In [32]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV
from sklearn.model_selection import GridSearchCV,train_test_split

In [8]:
train_dataset_path= r"E:\data science\Projects\US-VISA-APPROVAL-PREDICTION\artifact\17_08_2024_13_58_31\data_transformation\transformed_data\training_data.npy"
test_dataset_path = r"E:\data science\Projects\US-VISA-APPROVAL-PREDICTION\artifact\17_08_2024_13_58_31\data_transformation\transformed_data\test_data.npy"

In [10]:
train_df = np.load(train_dataset_path)
test_df = np.load(test_dataset_path)

In [34]:
train_df,test_df,y_train_df,y_test_df = train_df[:,:-1], test_df[:,:-1], train_df[:,-1], test_df[:,-1]

In [38]:
X_train,X_test,y_train,y_test= train_test_split(train_df,y_train_df,test_size=0.25)

In [48]:
lg_model = LogisticRegression()
lg_model.fit(X_train,y_train)
print(lg_model.score(X_train,y_train))
print(lg_model.score(X_test,y_test))

0.840500310109572
0.8269767441860465


In [42]:
y_pred = lg_model.predict(X_test)
y_pred

array([1., 0., 0., ..., 0., 1., 1.])

In [44]:
y_pred_proba = lg_model.predict_proba(X_test)
y_pred_proba

array([[0.20200091, 0.79799909],
       [0.91401522, 0.08598478],
       [0.77275277, 0.22724723],
       ...,
       [0.63630593, 0.36369407],
       [0.09872768, 0.90127232],
       [0.10425749, 0.89574251]])

In [68]:
lg_model = LogisticRegression(penalty='elasticnet',solver='saga',l1_ratio=0.5) # elasticnet only supported by saga check documentation
lg_model.fit(X_train,y_train)
print(lg_model.score(X_train,y_train))
print(lg_model.score(X_test,y_test))

0.8403969402522224
0.8269767441860465




In [70]:
lg_model = LogisticRegression(penalty='l1',solver='liblinear') # elasticnet only supported by saga check documentation
lg_model.fit(X_train,y_train)
print(lg_model.score(X_train,y_train))
print(lg_model.score(X_test,y_test))

0.8400868306801736
0.8269767441860465


In [90]:
lg_model = LogisticRegression(penalty='l1',solver='liblinear',C=0.01) # elasticnet only supported by saga check documentation
lg_model.fit(X_train,y_train)
print(lg_model.score(X_train,y_train))
print(lg_model.score(test_df,y_test_df))

0.835641926814141
0.8121114437025098


In [94]:
logreg_cv = LogisticRegressionCV(
    Cs=[0.2,0.5,1,5,10],               # Number of values for C to try (or an array of C values)
    cv=5,                # Number of cross-validation folds
    penalty='l2',        # Type of regularization ('l1', 'l2', 'elasticnet')
    solver='lbfgs',      # Optimization algorithm
    scoring='accuracy',  # Scoring metric to optimize
    max_iter=1000,       # Maximum number of iterations
    random_state=42,     # Random state for reproducibility
    multi_class='auto'   # Automatically selects between 'ovr' and 'multinomial'
)

In [129]:
logreg_cv.fit(X_train,y_train)
print(logreg_cv.score(X_train,y_train))
print(logreg_cv.score(X_test,y_test))
print(logreg_cv.score(test_df,y_test_df))

0.8401902005375232
0.826046511627907
0.8098088878655307


In [123]:
from sklearn.svm import SVC,LinearSVC,NuSVC

##### SVM

In [102]:
svc = SVC()
svc.fit(X_train,y_train)
print(svc.score(X_train,y_train))
print(svc.score(test_df,y_test_df))

0.8545586107091172
0.8072760764448538


In [115]:
svc = SVC(C=0.01)
svc.fit(X_train,y_train)
print(svc.score(X_train,y_train))
print(svc.score(test_df,y_test_df))

0.8170353524912135
0.78240847340548


In [116]:
svc = LinearSVC(max_iter=25000,dual='auto')
svc.fit(X_train,y_train)
print(svc.score(X_train,y_train))
print(svc.score(test_df,y_test_df))

0.8386396526772794
0.8063550541100621


In [121]:
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
print(svc.score(X_train,y_train))
print(svc.score(test_df,y_test_df))

0.8283026669423196
0.7959935528436565


In [127]:
svc = NuSVC()
svc.fit(X_train,y_train)
print(svc.score(X_train,y_train))
print(svc.score(X_test,y_test))
print(svc.score(test_df,y_test_df))


0.846909241265247
0.8418604651162791
0.8056642873589684
