In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np 
import time
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import RFE
from sklearn.feature_selection import chi2
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier   
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier


In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
def rfeFeature(indep_x,dep_y,n):
    rfelist=[]
    
    log_model=LogisticRegression(solver='lbfgs')
    
    RF=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    #NB=GaussianNB()
    
    DT=DecisionTreeClassifier(criterion='gini',max_features='sqrt',splitter='best',random_state=0)
    svc_model=SVC(kernel="linear",random_state=0)
    
    #knn=KNeighborsClassifier(n_neighbors=5,metrics='minkowski',p=2)
    rfemodellist=[log_model,svc_model,RF,DT]
    
    for i in rfemodellist:
        print(i)
        log_rfe=RFE(estimator=i, n_features_to_select=n)
        log_fit=log_rfe.fit(indep_x,dep_y)
        
        selected_features = indep_x.columns[log_fit.support_]
        print("Selected Features:", selected_features)
        log_rfe_feature=log_fit.transform(indep_x)
        rfelist.append(log_rfe_feature)
    return rfelist           
   

In [4]:
def split_scalar(indep_x,dep_y):
    x_train,x_test,y_train,y_test=train_test_split(indep_x,dep_y,test_size=0.25,random_state=0)
    sc=StandardScaler()
    x_train=sc.fit_transform(x_train)
    x_test=sc.transform(x_test)
    return x_train,x_test,y_train,y_test



In [5]:
def cm_prediction(classifier,x_test):
    y_pred=classifier.predict(x_test)
    from sklearn.metrics import confusion_matrix
    cm=confusion_matrix(y_test,y_pred)
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import classification_report
    accuracy=accuracy_score(y_test,y_pred)
    report=classification_report(y_test,y_pred)
    return classifier,accuracy, report,x_test,y_test,cm
 

In [6]:
def Logistic(x_train,y_train,x_test):
    from sklearn.linear_model import LogisticRegression
    classifier= LogisticRegression(random_state=0)
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm


def svm_linear(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier= SVC(kernel='linear',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm

    
def svm_NL(x_train,y_train,x_test):
    from sklearn.svm import SVC
    classifier= SVC(kernel='rbf',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm 

def knn(x_train,y_train,x_test):
    from sklearn.neighbors import KNeighborsClassifier
    classifier= KNeighborsClassifier(n_neighbors=5,metric='minkowski',p=2)
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm

def Naive(x_train,y_train,x_test):
    from sklearn.naive_bayes import GaussianNB
    classifier= GaussianNB()
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm

def Decision(x_train,y_train,x_test):
    from sklearn.tree import DecisionTreeClassifier
    classifier= DecisionTreeClassifier(criterion='entropy',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm

def random(x_train,y_train,x_test):
    from sklearn.ensemble import RandomForestClassifier
    classifier=RandomForestClassifier(n_estimators=10,criterion='entropy',random_state=0)
    classifier.fit(x_train,y_train)
    classifier,accuracy, report,x_test,y_test,cm=cm_prediction(classifier,x_test)
    return classifier,accuracy, report,x_test,y_test,cm


In [7]:
def rfe_classification(acclog,accsvml,accsnmnl,accknn,accnav,accdes,accrf):
    rfedataframe=pd.DataFrame(index=['Logistic','SVC','Decision Tree','Random'],
                              columns=['Logistic','SVMLIN','SVMNL','KNN','Naive','Decision','Random'])
    
    for number, idex in enumerate(rfedataframe.index):
        rfedataframe['Logistic'][idex]= acclog [number]
        rfedataframe[ 'SVMLIN'][idex]= accsvml [number]
        rfedataframe['SVMNL'][idex]= accsnmnl [number]
        rfedataframe['KNN'][idex]= accknn [number]
        rfedataframe['Naive'][idex]= accnav [number]
        rfedataframe['Decision'][idex]=accdes[number]
        rfedataframe['Random'][idex]=  accrf [number]
    return rfedataframe
    
   




In [8]:
dataset1=pd.read_csv('solar_panel_classification_data.csv',index_col=None)

In [9]:
df2=dataset1
df2

Unnamed: 0,Panel,Efficiency,Degradation,Power(W),solar_irradiance,temperature,humidity,wind_speed,cloud_cover,panel_age,panel_tilt_angle,latitude,longitude,output_category
0,Panel_C,10.50,0.26,276,234.582148,27.409992,34.203594,12.909419,47.135301,8,40,43.162039,-80.402974,low
1,Panel_C,13.35,0.95,248,701.541503,20.095389,71.946512,2.424767,98.612964,4,20,33.483335,-97.290986,medium
2,Panel_A,11.59,0.67,248,800.323062,31.027154,60.910303,12.327964,70.145991,3,10,49.220731,-93.750434,low
3,Panel_B,14.53,0.48,384,264.976616,32.347156,60.115497,14.559179,66.752851,1,30,39.840162,-87.148791,low
4,Panel_C,12.29,0.22,299,314.577546,15.657046,81.578566,6.378646,39.263899,12,50,33.434963,-123.012506,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Panel_B,18.10,0.15,302,714.936187,22.766445,21.291239,3.602661,61.595182,4,10,33.858955,-88.585466,medium
996,Panel_B,12.30,0.65,226,736.401113,24.012641,45.690515,9.612694,19.704401,18,20,28.335311,-71.953013,high
997,Panel_C,10.12,0.56,291,863.499489,16.035945,67.038076,12.090799,75.375519,21,30,38.662189,-105.852577,medium
998,Panel_B,13.77,0.28,315,738.160525,31.421819,84.590728,14.336480,3.404805,22,10,37.980271,-113.286274,high


In [10]:
df2 = df2.drop("Panel", axis=1)
indep_x = df2.drop(['output_category'], axis=1)
y = df2['output_category']

from sklearn.preprocessing import LabelEncoder, StandardScaler
label_encoder = LabelEncoder()
dep_y = label_encoder.fit_transform(y)
df2

Unnamed: 0,Efficiency,Degradation,Power(W),solar_irradiance,temperature,humidity,wind_speed,cloud_cover,panel_age,panel_tilt_angle,latitude,longitude,output_category
0,10.50,0.26,276,234.582148,27.409992,34.203594,12.909419,47.135301,8,40,43.162039,-80.402974,low
1,13.35,0.95,248,701.541503,20.095389,71.946512,2.424767,98.612964,4,20,33.483335,-97.290986,medium
2,11.59,0.67,248,800.323062,31.027154,60.910303,12.327964,70.145991,3,10,49.220731,-93.750434,low
3,14.53,0.48,384,264.976616,32.347156,60.115497,14.559179,66.752851,1,30,39.840162,-87.148791,low
4,12.29,0.22,299,314.577546,15.657046,81.578566,6.378646,39.263899,12,50,33.434963,-123.012506,low
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,18.10,0.15,302,714.936187,22.766445,21.291239,3.602661,61.595182,4,10,33.858955,-88.585466,medium
996,12.30,0.65,226,736.401113,24.012641,45.690515,9.612694,19.704401,18,20,28.335311,-71.953013,high
997,10.12,0.56,291,863.499489,16.035945,67.038076,12.090799,75.375519,21,30,38.662189,-105.852577,medium
998,13.77,0.28,315,738.160525,31.421819,84.590728,14.336480,3.404805,22,10,37.980271,-113.286274,high


In [19]:
rfelist=rfeFeature(indep_x,dep_y,4)

acclog=[]
accsvml=[]
accsvmnl=[]
accknn=[]
accnav=[]
accdes=[]
accrf=[]

LogisticRegression()
Selected Features: Index(['temperature', 'wind_speed', 'cloud_cover', 'latitude'], dtype='object')
SVC(kernel='linear', random_state=0)
Selected Features: Index(['Degradation', 'temperature', 'wind_speed', 'cloud_cover'], dtype='object')
RandomForestClassifier(criterion='entropy', n_estimators=10, random_state=0)
Selected Features: Index(['Efficiency', 'solar_irradiance', 'temperature', 'cloud_cover'], dtype='object')
DecisionTreeClassifier(max_features='sqrt', random_state=0)
Selected Features: Index(['solar_irradiance', 'temperature', 'cloud_cover', 'latitude'], dtype='object')


In [20]:
for i in rfelist:
    x_train,x_test,y_train,y_test=split_scalar(i,dep_y)

    classifier, accuracy, report, x_test, y_test, cm = Logistic(x_train, y_train, x_test)
    acclog.append(accuracy)
    classifier,accuracy, report,x_test,y_test,cm=svm_linear(x_train,y_train,x_test)
    accsvml.append(accuracy)
    classifier,accuracy, report,x_test,y_test,cm=svm_NL(x_train,y_train,x_test)
    accsvmnl.append(accuracy)
    classifier,accuracy, report,x_test,y_test,cm=knn(x_train,y_train,x_test)
    accknn.append(accuracy)
    classifier,accuracy, report,x_test,y_test,cm=Naive(x_train,y_train,x_test)
    accnav.append(accuracy)
    classifier,accuracy, report,x_test,y_test,cm=Decision(x_train,y_train,x_test)
    accdes.append(accuracy)
    classifier,accuracy, report,x_test,y_test,cm=random(x_train,y_train,x_test)
    accrf.append(accuracy)

    
result=rfe_classification(acclog,accsvml,accsvmnl,accknn,accnav,accdes,accrf)
    
    

In [21]:
result
#4

Unnamed: 0,Logistic,SVMLIN,SVMNL,KNN,Naive,Decision,Random
Logistic,0.664,0.688,0.724,0.7,0.68,0.7,0.704
SVC,0.664,0.684,0.744,0.712,0.684,0.704,0.732
Decision Tree,0.76,0.764,0.944,0.936,0.88,1.0,1.0
Random,0.776,0.772,0.952,0.9,0.868,1.0,1.0


In [18]:
result
#5

Unnamed: 0,Logistic,SVMLIN,SVMNL,KNN,Naive,Decision,Random
Logistic,0.648,0.68,0.72,0.648,0.664,0.696,0.76
SVC,0.668,0.68,0.744,0.712,0.664,0.68,0.736
Decision Tree,0.772,0.764,0.94,0.916,0.86,1.0,1.0
Random,0.776,0.776,0.948,0.892,0.876,1.0,1.0


In [15]:
result
#6

Unnamed: 0,Logistic,SVMLIN,SVMNL,KNN,Naive,Decision,Random
Logistic,0.652,0.684,0.708,0.64,0.672,0.7,0.748
SVC,0.66,0.692,0.724,0.636,0.668,0.712,0.752
Decision Tree,0.764,0.764,0.932,0.856,0.86,1.0,1.0
Random,0.784,0.784,0.924,0.848,0.888,1.0,1.0
