In [72]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, f1_score
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.pipeline import Pipeline

In [73]:
df = pd.read_csv('Glass.csv')
df

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,building_windows_float_processed
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,building_windows_float_processed
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,building_windows_float_processed
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,building_windows_float_processed
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,building_windows_float_processed
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,headlamps
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,headlamps
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,headlamps
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,headlamps


In [74]:
X = df.drop('Type', axis=1)
y = df['Type']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [75]:
penalties = ['l1', 'l2', 'elasticnet', None]
l1 = np.linspace(0.001,1,5)
multi = ['ovr', 'multinomial']
scores = []

scaler = MinMaxScaler()


for i in penalties:
    for l in l1:
        for m in multi:
            lr = LogisticRegression(random_state= 24 , penalty=i , solver='saga', l1_ratio=l, multi_class=m)
            pipe = Pipeline([('Scaling', scaler), ('LR', lr)])
            pipe.fit(X_train, y_train)
            y_pred = pipe.predict(X_test)
            scores.append([i, l ,m ,  accuracy_score(y_test,y_pred)])

scores        

df1 = pd.DataFrame(scores , columns=['Penalty', 'l1', 'Multiclass','Accuracy'])
df1.sort_values('Accuracy',ascending=False ).iloc[0]  





Penalty              None
l1                    1.0
Multiclass    multinomial
Accuracy         0.630769
Name: 39, dtype: object

# On Image segmentation Data

In [88]:
df = pd.read_csv('Image_Segmentation.csv')

In [90]:
df


Unnamed: 0,Class,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,BRICKFACE,188,133,9,0.000000,0.0,0.333333,0.266667,0.500000,0.077778,6.666666,8.333334,7.777778,3.888889,5.000000,3.333333,-8.333333,8.444445,0.538580,-0.924817
1,BRICKFACE,105,139,9,0.000000,0.0,0.277778,0.107407,0.833333,0.522222,6.111111,7.555555,7.222222,3.555556,4.333334,3.333333,-7.666666,7.555555,0.532628,-0.965946
2,BRICKFACE,34,137,9,0.000000,0.0,0.500000,0.166667,1.111111,0.474074,5.851852,7.777778,6.444445,3.333333,5.777778,1.777778,-7.555555,7.777778,0.573633,-0.744272
3,BRICKFACE,39,111,9,0.000000,0.0,0.722222,0.374074,0.888889,0.429629,6.037037,7.000000,7.666666,3.444444,2.888889,4.888889,-7.777778,7.888889,0.562919,-1.175773
4,BRICKFACE,16,128,9,0.000000,0.0,0.500000,0.077778,0.666667,0.311111,5.555555,6.888889,6.666666,3.111111,4.000000,3.333333,-7.333334,7.111111,0.561508,-0.985811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
204,GRASS,36,243,9,0.111111,0.0,1.888889,1.851851,2.000000,0.711110,13.333333,9.888889,12.111111,18.000000,-10.333333,-3.666667,14.000000,18.000000,0.452229,2.368311
205,GRASS,186,218,9,0.000000,0.0,1.166667,0.744444,1.166667,0.655555,13.703704,10.666667,12.666667,17.777779,-9.111111,-3.111111,12.222222,17.777779,0.401347,2.382684
206,GRASS,197,236,9,0.000000,0.0,2.444444,6.829628,3.333333,7.599998,16.074074,13.111111,16.666668,18.444445,-8.888889,1.777778,7.111111,18.555555,0.292729,2.789800
207,GRASS,208,240,9,0.111111,0.0,1.055556,0.862963,2.444444,5.007407,14.148149,10.888889,13.000000,18.555555,-9.777778,-3.444444,13.222222,18.555555,0.421621,2.392487


In [92]:
X = df.drop('Class', axis = 1)
y = df['Class']

In [94]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24, stratify=y)

In [110]:
df_test = pd.read_csv('tst_img.csv')

In [126]:
penalties = ['l1','l2','elasticnet', None]
l1 = np.linspace(0.001,1,5)
multi = ['ovr', 'multinomial']
scores = []

scaler = StandardScaler()

for i in penalties:
    for l in l1:
        for m in multi:
            lr = LogisticRegression(penalty=i,solver='saga',l1_ratio= l,multi_class=m)
            pipe = Pipeline([('Scaling', scaler), ('LR', lr)])
            pipe.fit(X,y)
            y_pred = pipe.predict(df_test)
            scores.append(y_pred)

# df1 = pd.DataFrame(scores , columns=['Accuracy'])
# df1.sort_values('Accuracy',ascending=False ).iloc[0]  
scores



[array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object),
 array(['SKY', 'PATH', 'GRASS',

In [106]:
df1 = df1.sort_values('Accuracy', ascending = False)

In [108]:
df1

Unnamed: 0,Penalty,l1,Multiclass,Accuracy
38,,1.0,ovr,0.873016
36,,0.75025,ovr,0.873016
34,,0.5005,ovr,0.873016
32,,0.25075,ovr,0.873016
30,,0.001,ovr,0.873016
0,l1,0.001,ovr,0.857143
14,l2,0.5005,ovr,0.857143
28,elasticnet,1.0,ovr,0.857143
26,elasticnet,0.75025,ovr,0.857143
24,elasticnet,0.5005,ovr,0.857143


In [118]:
df_test

Unnamed: 0,region.centroid.col,region.centroid.row,region.pixel.count,short.line.density.5,short.line.density.2,vedge.mean,vegde.sd,hedge.mean,hedge.sd,intensity.mean,rawred.mean,rawblue.mean,rawgreen.mean,exred.mean,exblue.mean,exgreen.mean,value.mean,saturation.mean,hue-mean
0,22,90,10,0,0,0.666668,0.044444,0.88,0.562963,112.0,105.888885,128.55556,106.0,-22.777779,45.22222,-22.444445,128.55556,0.179697,-2.097815
1,210,200,9,0,0,1.3,0.998145,1.611111,1.123816,49.48148,45.0,60.666668,43.0,-14.111111,35.0,-19.444445,60.666668,0.290788,-1.987599
2,240,184,9,0,0,0.5,0.077778,0.777778,0.785185,11.851851,9.777778,9.888889,15.888889,-5.0,-5.888889,13.0,15.888889,0.5,2.128646
3,130,191,9,0,0,1.0,0.4,1.5,1.011111,7.333334,5.333334,5.0,11.222222,-7.0,-5.666666,11.666667,11.222222,0.53582,2.122422


In [130]:
# For best 

lr = LogisticRegression(penalty=None,solver='saga',multi_class='ovr')
pipe = Pipeline([('Scaling', scaler), ('LR', lr)])
pipe.fit(X,y)
y_pred = pipe.predict(df_test)
y_pred



array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object)

In [136]:
probs = pipe.predict_proba(df_test)
pd.DataFrame(probs)0

Unnamed: 0,0,1,2,3,4,5,6
0,0.031713,0.069505,0.002517,0.005464,0.09656,0.794117,0.000125
1,0.016612,0.150398,0.001873,0.008519,0.792577,0.015593,0.01443
2,0.007736,0.000942,0.003468,0.905009,0.004012,0.008414,0.070419
3,0.015462,0.000449,0.019907,0.927433,0.004196,0.005679,0.026874


In [140]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le_y = le.fit_transform(y)
le_y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
       6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [142]:
le.classes_

array(['BRICKFACE', 'CEMENT', 'FOLIAGE', 'GRASS', 'PATH', 'SKY', 'WINDOW'],
      dtype=object)

In [152]:
lr = LogisticRegression(penalty=None,solver='saga',multi_class='ovr')
pipe = Pipeline([('Scaling', scaler), ('LR', lr)])
pipe.fit(X,le_y)
y_pred = pipe.predict(df_test)
y_pred



array([5, 4, 3, 3])

In [146]:
pipe

In [148]:
probs = pipe.predict_proba(df_test)
pd.DataFrame(probs, columns = le.classes_)

Unnamed: 0,BRICKFACE,CEMENT,FOLIAGE,GRASS,PATH,SKY,WINDOW
0,0.031704,0.06949,0.002493,0.00533,0.096988,0.793868,0.000127
1,0.016569,0.15052,0.001877,0.008408,0.792534,0.015637,0.014455
2,0.007666,0.000921,0.003499,0.904896,0.004052,0.008484,0.070481
3,0.015316,0.000439,0.020171,0.927243,0.004229,0.005724,0.026878


In [154]:
le.inverse_transform(y_pred)

array(['SKY', 'PATH', 'GRASS', 'GRASS'], dtype=object)