In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier   
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as sfs


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def sfsFeature(indep_x,dep_y):
    SFSlist=[]
    SFSlist1=[]
    SFSlist2=[]
     


    log_model = LogisticRegression(solver='lbfgs')
    svc_model=SVC(kernel='linear',max_iter=100,random_state=0)
    DT=DecisionTreeClassifier(criterion='gini',max_features='sqrt',splitter='best',random_state=0)
    RF=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    SFSmodellist=[log_model,svc_model,DT,RF]
    

    for model in SFSmodellist:
        log_sfs = sfs(model, k_features=6, forward=True, floating=False, verbose=0, scoring='accuracy', cv=5, n_jobs=1)
        log_fit = log_sfs.fit(indep_x, dep_y)
        log_sfs_feature = log_fit.transform(indep_x)
        SFSlist.append(log_sfs_feature)

        # Get the indices and names of selected features
        op = log_sfs.k_feature_idx_
        op1 = log_sfs.k_feature_names_
        SFSlist1.append(op)
        SFSlist2.append(op1)

    return SFSlist, SFSlist1, SFSlist2
def split_scalar(indep_X,dep_Y):
        X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
        
        #Feature Scaling
        #from sklearn.preprocessing import StandardScaler
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
        
        return X_train, X_test, y_train, y_test,sc
        
    
def cm_prediction(classifier,X_test):
        y_pred = classifier.predict(X_test)
        
        # Making the Confusion Matrix
        from sklearn.metrics import confusion_matrix
        cm = confusion_matrix(y_test, y_pred)
        
        from sklearn.metrics import accuracy_score 
        from sklearn.metrics import classification_report 
        
        Accuracy=accuracy_score(y_test, y_pred )
        
        report=classification_report(y_test, y_pred)
        return  classifier,Accuracy,report,X_test,y_test,cm


In [4]:
def logistic(X_train,y_train,X_test):
    
        from sklearn.linear_model import LogisticRegression
        classifier = LogisticRegression(random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm      
    
def svm_linear(X_train,y_train,X_test):
                
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'linear', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm
    
def svm_NL(X_train,y_train,X_test):
                
        from sklearn.svm import SVC
        classifier = SVC(kernel = 'rbf', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm

def Navie(X_train,y_train,X_test):   
    
        from sklearn.naive_bayes import GaussianNB
        classifier = GaussianNB()
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm         
    
    
def knn(X_train,y_train,X_test):
           
        from sklearn.neighbors import KNeighborsClassifier
        classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm
    
def Decision(X_train,y_train,X_test):
        
        from sklearn.tree import DecisionTreeClassifier
        classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm      


def random(X_train,y_train,X_test):
        
        from sklearn.ensemble import RandomForestClassifier
        classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
        classifier.fit(X_train, y_train)
        classifier,Accuracy,report,X_test,y_test,cm=cm_prediction(classifier,X_test)
        return  classifier,Accuracy,report,X_test,y_test,cm
    

def sfs_classification(acclog, accsvml, accsvmnl, accknn, accnav, accdes, accr):
   
    sfsdataframe = pd.DataFrame(index=['Logistic','SVC','DT','RF'],
                                 columns=['Logistic', 'SVMl', 'SVMnl', 'KNN', 'Navie', 'Decision', 'Random'])
    for number,idex in enumerate(sfsdataframe.index):
        
        sfsdataframe['Logistic'][idex]=acclog[number]       
        sfsdataframe['SVMl'][idex]=accsvml[number]
        sfsdataframe['SVMnl'][idex]=accsvmnl[number]
        sfsdataframe['KNN'][idex]=accknn[number]
        sfsdataframe['Navie'][idex]=accnav[number]
        sfsdataframe['Decision'][idex]=accdes[number]
        sfsdataframe['Random'][idex]=accrf[number]
    

    return sfsdataframe

In [5]:
dataset1=pd.read_csv("solar_panel_classification_data.csv",index_col=None)
df2=dataset1
df2 = df2.drop("Panel", axis=1)
indep_X = df2.drop(['output_category'], axis=1)
y = df2['output_category']

from sklearn.preprocessing import LabelEncoder, StandardScaler
label_encoder = LabelEncoder()
dep_Y = label_encoder.fit_transform(y)
df2

Unnamed: 0,Efficiency,Degradation,Power(W),solar_irradiance,temperature,humidity,wind_speed,cloud_cover,panel_age,panel_tilt_angle,latitude,longitude,output_category
0,10.50,0.26,276,234.582148,27.409992,34.203594,12.909419,47.135301,8,40,43.162039,-80.402974,low
1,13.35,0.95,248,701.541503,20.095389,71.946512,2.424767,98.612964,4,20,33.483335,-97.290986,medium
2,11.59,0.67,248,800.323062,31.027154,60.910303,12.327964,70.145991,3,10,49.220731,-93.750434,low
3,14.53,0.48,384,264.976616,32.347156,60.115497,14.559179,66.752851,1,30,39.840162,-87.148791,low
4,12.29,0.22,299,314.577546,15.657046,81.578566,6.378646,39.263899,12,50,33.434963,-123.012506,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,18.10,0.15,302,714.936187,22.766445,21.291239,3.602661,61.595182,4,10,33.858955,-88.585466,medium
996,12.30,0.65,226,736.401113,24.012641,45.690515,9.612694,19.704401,18,20,28.335311,-71.953013,high
997,10.12,0.56,291,863.499489,16.035945,67.038076,12.090799,75.375519,21,30,38.662189,-105.852577,medium
998,13.77,0.28,315,738.160525,31.421819,84.590728,14.336480,3.404805,22,10,37.980271,-113.286274,high


In [6]:
SFSlist=sfsFeature(indep_X,dep_Y)       

acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]

In [7]:
SFSlist, SFSlist1,SFSlist2=SFSlist

In [8]:
SFSlist

[array([[2.60000000e-01, 2.34582148e+02, 2.74099918e+01, 1.29094187e+01,
         4.71353011e+01, 8.00000000e+00],
        [9.50000000e-01, 7.01541503e+02, 2.00953889e+01, 2.42476702e+00,
         9.86129639e+01, 4.00000000e+00],
        [6.70000000e-01, 8.00323062e+02, 3.10271543e+01, 1.23279636e+01,
         7.01459912e+01, 3.00000000e+00],
        ...,
        [5.60000000e-01, 8.63499489e+02, 1.60359449e+01, 1.20907991e+01,
         7.53755194e+01, 2.10000000e+01],
        [2.80000000e-01, 7.38160525e+02, 3.14218188e+01, 1.43364804e+01,
         3.40480472e+00, 2.20000000e+01],
        [3.80000000e-01, 3.16168286e+02, 3.90393966e+01, 2.95542813e+00,
         8.76727467e+01, 7.00000000e+00]]),
 array([[ 10.5       , 276.        , 234.5821483 ,  27.40999176,
          47.13530108,  40.        ],
        [ 13.35      , 248.        , 701.5415026 ,  20.09538889,
          98.61296386,  20.        ],
        [ 11.59      , 248.        , 800.3230625 ,  31.02715428,
          70.14599119,  

In [9]:
 SFSlist1

[(1, 3, 4, 6, 7, 8),
 (0, 2, 3, 4, 7, 9),
 (0, 1, 2, 3, 4, 7),
 (0, 3, 4, 5, 7, 9)]

In [10]:
SFSlist2

[('Degradation',
  'solar_irradiance',
  'temperature',
  'wind_speed',
  'cloud_cover',
  'panel_age'),
 ('Efficiency',
  'Power(W)',
  'solar_irradiance',
  'temperature',
  'cloud_cover',
  'panel_tilt_angle'),
 ('Efficiency',
  'Degradation',
  'Power(W)',
  'solar_irradiance',
  'temperature',
  'cloud_cover'),
 ('Efficiency',
  'solar_irradiance',
  'temperature',
  'humidity',
  'cloud_cover',
  'panel_tilt_angle')]

In [11]:
for i in SFSlist:   
    
    X_train, X_test, y_train, y_test,sc=split_scalar(i,dep_Y)   
    
        
    classifier,Accuracy,report,X_test,y_test,cm=logistic(X_train,y_train,X_test)
    acclog.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=svm_linear(X_train,y_train,X_test)  
    accsvml.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=svm_NL(X_train,y_train,X_test)  
    accsvmnl.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=knn(X_train,y_train,X_test)  
    accknn.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=Navie(X_train,y_train,X_test)  
    accnav.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=Decision(X_train,y_train,X_test)  
    accdes.append(Accuracy)
    
    classifier,Accuracy,report,X_test,y_test,cm=random(X_train,y_train,X_test)  
    accrf.append(Accuracy)
result=sfs_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)


In [12]:
result
#6

Unnamed: 0,Logistic,SVMl,SVMnl,KNN,Navie,Decision,Random
Logistic,0.78,0.792,0.94,0.86,0.864,1,0.996
SVC,0.772,0.768,0.912,0.848,0.88,1,1.0
DT,0.764,0.76,0.932,0.864,0.88,1,0.996
RF,0.764,0.76,0.924,0.868,0.872,1,1.0
