### Import Statement

In [50]:
import pandas as pd
import matplotlib.pylab as plt
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

### Reading data from CSV file

In [49]:
df = pd.read_csv('task2.csv')
df.fillna(0, inplace = True)
df

Unnamed: 0,ID,Segment type,Segment Description,Answer,Count,Percentage,It became a relationship
0,292890.8970,web,"Meridian, Idaho",No,0.000000,0.000000,0
1,292887.9870,web,"Meridian, Idaho",No,0.000000,0.000000,0
2,292894.0656,gender,"Meridian, Idaho",No,499.173606,0.225255,0
3,292887.1180,web,"Meridian, Idaho",No,0.000000,0.000000,0
4,292893.6561,gender,"Meridian, Idaho",No,455.925963,0.211360,0
...,...,...,...,...,...,...,...
1891,292887.5496,web,"Meridian, Idaho",No,0.000000,0.000000,0
1892,292881.6932,mobile,"Meridian, Idaho",No,1203.190399,0.312360,0
1893,292900.8499,gender,"Meridian, Idaho",No,806.378820,0.488025,0
1894,292893.8600,gender,"Meridian, Idaho",No,1149.529381,0.488984,0


In [59]:
df_input = df.drop(columns = ['It became a relationship','ID'])
y = df['It became a relationship'].values

## Processing the inputs

In [60]:
df_input['Segment type'] = pd.Categorical(pd.factorize(df_input['Segment type'])[0])
df_input['Segment Description'] = pd.Categorical(pd.factorize(df_input['Segment Description'])[0])
df_input['Answer'] = pd.Categorical(pd.factorize(df_input['Answer'])[0])

X = df_input.values

df_input

Unnamed: 0,Segment type,Segment Description,Answer,Count,Percentage
0,0,0,0,0.000000,0.000000
1,0,0,0,0.000000,0.000000
2,1,0,0,499.173606,0.225255
3,0,0,0,0.000000,0.000000
4,1,0,0,455.925963,0.211360
...,...,...,...,...,...
1891,0,0,0,0.000000,0.000000
1892,3,0,0,1203.190399,0.312360
1893,1,0,0,806.378820,0.488025
1894,1,0,0,1149.529381,0.488984


## Implementing LogisticRegression Model 

In [84]:
grid = GridSearchCV(
    estimator = LogisticRegression(),
    param_grid = {
        'class_weight' : [{0:1, 1:v} for v in range(1,4)]
    },
    cv = 10,
    n_jobs = -1
)
grid.fit(X,y).predict(X)

r1 = pd.DataFrame(grid.cv_results_).sort_values('rank_test_score',ascending = 1)

In [85]:
r1

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_class_weight,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
0,0.03921,0.004216,0.0004,0.00049,"{0: 1, 1: 1}","{'class_weight': {0: 1, 1: 1}}",0.815789,0.778947,0.831579,0.836842,0.794737,0.826316,0.857143,0.825397,0.804233,0.825397,0.819638,0.021176,1
1,0.04141,0.005005,0.0008,0.0004,"{0: 1, 1: 2}","{'class_weight': {0: 1, 1: 2}}",0.805263,0.768421,0.763158,0.826316,0.747368,0.815789,0.830688,0.793651,0.777778,0.767196,0.789563,0.02749,2
2,0.035605,0.004863,0.0002,0.0004,"{0: 1, 1: 3}","{'class_weight': {0: 1, 1: 3}}",0.805263,0.768421,0.768421,0.831579,0.736842,0.8,0.809524,0.767196,0.767196,0.761905,0.781635,0.027073,3


## Implementing RidgeClassifier Model

In [86]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(
    estimator = RidgeClassifier(),
    param_grid = {
        'fit_intercept' : [True,False],
        'normalize' : [True,False],
        'class_weight' : [{0:1, 1:v} for v in range(1,4)]
    },
    cv = 10,
    n_jobs = -1
)
grid.fit(X,y).predict(X)

r2 = pd.DataFrame(grid.cv_results_).sort_values('rank_test_score',ascending = 1)

In [88]:
r2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_class_weight,param_fit_intercept,param_normalize,params,split0_test_score,split1_test_score,...,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
2,0.001901,0.0003,0.0001,0.0003,"{0: 1, 1: 1}",False,True,"{'class_weight': {0: 1, 1: 1}, 'fit_intercept'...",0.810526,0.789474,...,0.836842,0.794737,0.826316,0.84127,0.846561,0.809524,0.857143,0.82545,0.02196,1
3,0.001799,0.000402,0.0003,0.000458,"{0: 1, 1: 1}",False,False,"{'class_weight': {0: 1, 1: 1}, 'fit_intercept'...",0.810526,0.789474,...,0.836842,0.794737,0.826316,0.84127,0.846561,0.809524,0.857143,0.82545,0.02196,1
1,0.002202,0.0004,0.0001,0.0003,"{0: 1, 1: 1}",True,False,"{'class_weight': {0: 1, 1: 1}, 'fit_intercept'...",0.815789,0.784211,...,0.847368,0.794737,0.826316,0.867725,0.825397,0.78836,0.830688,0.82227,0.025661,3
5,0.002201,0.0004,0.000201,0.000402,"{0: 1, 1: 2}",True,False,"{'class_weight': {0: 1, 1: 2}, 'fit_intercept'...",0.805263,0.768421,...,0.831579,0.736842,0.815789,0.825397,0.777778,0.783069,0.756614,0.786917,0.029756,4
4,0.002601,0.000491,0.000401,0.000491,"{0: 1, 1: 2}",True,True,"{'class_weight': {0: 1, 1: 2}, 'fit_intercept'...",0.815789,0.763158,...,0.831579,0.736842,0.805263,0.804233,0.777778,0.772487,0.761905,0.783219,0.028075,5
0,0.002302,0.000459,0.0003,0.000458,"{0: 1, 1: 1}",True,True,"{'class_weight': {0: 1, 1: 1}, 'fit_intercept'...",0.815789,0.763158,...,0.763158,0.763158,0.768421,0.783069,0.756614,0.825397,0.798942,0.780086,0.023506,6
9,0.002001,1e-06,0.0002,0.0004,"{0: 1, 1: 3}",True,False,"{'class_weight': {0: 1, 1: 3}, 'fit_intercept'...",0.805263,0.768421,...,0.826316,0.726316,0.805263,0.809524,0.767196,0.761905,0.756614,0.779524,0.029119,7
6,0.001501,0.0005,0.0005,0.0005,"{0: 1, 1: 2}",False,True,"{'class_weight': {0: 1, 1: 2}, 'fit_intercept'...",0.742105,0.736842,...,0.794737,0.726316,0.752632,0.772487,0.78836,0.761905,0.703704,0.753172,0.026407,8
7,0.0012,0.000401,0.0007,0.000459,"{0: 1, 1: 2}",False,False,"{'class_weight': {0: 1, 1: 2}, 'fit_intercept'...",0.742105,0.736842,...,0.794737,0.726316,0.752632,0.772487,0.78836,0.761905,0.703704,0.753172,0.026407,8
8,0.001901,0.000301,0.000399,0.000489,"{0: 1, 1: 3}",True,True,"{'class_weight': {0: 1, 1: 3}, 'fit_intercept'...",0.757895,0.731579,...,0.794737,0.678947,0.789474,0.73545,0.714286,0.724868,0.719577,0.736786,0.033386,10


### Accuracy of First Model:

In [90]:
r1['mean_test_score'][0] * 100

81.96379838485102

### Accuracy of Second Model:

In [91]:
r2['mean_test_score'][0] * 100

78.00863269284322