In [None]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV

In [5]:
import os
from google.colab import drive
drive.mount("/content/drive/")
os.chdir("drive/My Drive/Colab Notebooks/CSE 512 Project")

Mounted at /content/drive/


In [None]:
df = pd.read_csv("normalized_numerical_data.csv")
y = np.array(df['gender'])
X = df.drop('gender',axis=1)
num_feat = X.shape[1]

X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42)

In [None]:
start = time.time()

param_grid_poly = [{'C': [0.1,10,20,30,40,50,60,70,80,90,100], 'gamma': [10,1,0.1,0.01,0.001,0.0001], 'kernel': ['poly']},]


svc = SVC()
halving_cv_poly = HalvingGridSearchCV(
    svc, param_grid_poly, scoring="accuracy", n_jobs=-1, min_resources="exhaust", factor=3
)
halving_cv_poly.fit(X_train, y_train)
best_score_poly = halving_cv_poly.best_score_
best_params_poly = halving_cv_poly.best_params_



print("Best Parameters:", best_params_poly)
print("Best Score:", best_score_poly)

end = time.time()
print("Runtime (sec.):", end - start)

Best Parameters: {'C': 60, 'gamma': 0.1, 'kernel': 'poly'}
Best Score: 0.9717146433041302
Runtime (sec.): 4.113746881484985


In [None]:
poly_svm = SVC(C=60, gamma=0.1,kernel='poly')
poly_svm.fit(X_train, y_train)

SVC(C=60, gamma=0.1, kernel='poly')

In [None]:
y_pred = poly_svm.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[494   8]
 [ 26 473]]
              precision    recall  f1-score   support

           0       0.95      0.98      0.97       502
           1       0.98      0.95      0.97       499

    accuracy                           0.97      1001
   macro avg       0.97      0.97      0.97      1001
weighted avg       0.97      0.97      0.97      1001



In [None]:
poly_res = pd.DataFrame({"y_actual": y_test, "y_pred": y_pred})

In [None]:
poly_res.to_csv("poly_svm_res.csv")

In [None]:
start = time.time()

param_grid_rbf = [{'C': [0.1,10,20,30,40,50,60,70,80,90,100], 'gamma': [10,1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']},]


svc = SVC()
halving_cv_rbf = HalvingGridSearchCV(
    svc, param_grid_rbf, scoring="accuracy", n_jobs=-1, min_resources="exhaust", factor=3
)
halving_cv_rbf.fit(X_train, y_train)
best_score_rbf = halving_cv_rbf.best_score_
best_params_rbf = halving_cv_rbf.best_params_



print("Best Parameters:", best_params_rbf)
print("Best Score:", best_score_rbf)

end = time.time()
print("Runtime (sec.):", end - start)

Best Parameters: {'C': 50, 'gamma': 0.1, 'kernel': 'rbf'}
Best Score: 0.9727158948685857
Runtime (sec.): 5.88062047958374


In [None]:
rbf_svm = SVC(C=50, gamma=0.1,kernel='rbf')
rbf_svm.fit(X_train, y_train)

SVC(C=50, gamma=0.1)

In [None]:
y_pred = rbf_svm.predict(X_test)

In [None]:
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[493   9]
 [ 25 474]]
              precision    recall  f1-score   support

           0       0.95      0.98      0.97       502
           1       0.98      0.95      0.97       499

    accuracy                           0.97      1001
   macro avg       0.97      0.97      0.97      1001
weighted avg       0.97      0.97      0.97      1001



In [None]:
rbf_res = pd.DataFrame({"y_actual": y_test, "y_pred": y_pred})
rbf_res.to_csv("rbf_svm_res.csv")

In [None]:
start = time.time()

param_grid_sig = [{'C': [0.1,10,20,30,40,50,60,70,80,90,100], 'gamma': [10,1,0.1,0.01,0.001,0.0001], 'kernel': ['sigmoid']},]


svc = SVC()
halving_cv_sig = HalvingGridSearchCV(
    svc, param_grid_sig, scoring="accuracy", n_jobs=-1, min_resources="exhaust", factor=3
)
halving_cv_sig.fit(X_train, y_train)
best_score_sig = halving_cv_sig.best_score_
best_params_sig = halving_cv_sig.best_params_



print("Best Parameters:", best_params_sig)
print("Best Score:", best_score_sig)

end = time.time()
print("Runtime (sec.):", end - start)

Best Parameters: {'C': 10, 'gamma': 0.01, 'kernel': 'sigmoid'}
Best Score: 0.9667083854818523
Runtime (sec.): 5.426228761672974


In [None]:
sig_svm = SVC(C=10, gamma=0.01,kernel='sigmoid')
sig_svm.fit(X_train, y_train)

SVC(C=10, gamma=0.01, kernel='sigmoid')

In [None]:
y_pred = sig_svm.predict(X_test)

In [None]:
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[483  19]
 [ 19 480]]
              precision    recall  f1-score   support

           0       0.96      0.96      0.96       502
           1       0.96      0.96      0.96       499

    accuracy                           0.96      1001
   macro avg       0.96      0.96      0.96      1001
weighted avg       0.96      0.96      0.96      1001



In [None]:
sig_res = pd.DataFrame({"y_actual": y_test, "y_pred": y_pred})
sig_res.to_csv("sig_svm_res.csv")

In [None]:
X = pd.read_csv("X_data.csv")
y = pd.read_csv("y_data.csv")

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
# get shape of feature matrix
print('Feature matrix shape is: ', X.shape)

# define standard scaler
ss = StandardScaler()
# run this on our feature matrix
X_stand = ss.fit_transform(X)

pca = PCA(n_components=500)
# use fit_transform to run PCA on our standardized matrix
X_pca = ss.fit_transform(X_stand)
# look at new shape
print('PCA matrix shape is: ', X_pca.shape)

Feature matrix shape is:  (58657, 2500)
PCA matrix shape is:  (58657, 2500)


In [None]:
y = y.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split( X_pca, y.to_numpy(), test_size=0.2, random_state=42)

In [None]:
start = time.time()

param_grid_poly = [{'C': [25,50,75,100], 'gamma': [0.1,0.01,0.001], 'kernel': ['poly']},]


svc = SVC()
halving_cv_poly = HalvingGridSearchCV(
    svc, param_grid_poly, scoring="accuracy", n_jobs=-1, min_resources="smallest", factor=3
)
halving_cv_poly.fit(X_train, y_train)
best_score_poly = halving_cv_poly.best_score_
best_params_poly = halving_cv_poly.best_params_



print("Best Parameters:", best_params_poly)
print("Best Score:", best_score_poly)

end = time.time()
print("Runtime (sec.):", end - start)

In [None]:
poly_svm = SVC(C=100, gamma=0.0001,kernel='poly')
poly_svm.fit(X_train, y_train)

SVC(C=100, gamma=0.0001, kernel='poly')

In [None]:
y_hat_test = poly_svm.decision_function(X_test)
y_pred = poly_svm.predict(X_test)
y_decision_test = (y_hat_test >= 0)
R_test = (y_decision_test == y_test)
Accuracy_test = np.sum(R_test) / len(y_decision_test)

In [None]:
FeatImportance = poly_svm.feature_importances_
print(FeatImportance)

In [None]:
Output = X_test
Output.insert(7, "gender", y_test)
Output.insert(8, "gender_predicted", y_hat_test)
Output.to_csv('SVMPolyNumericalResults.csv')

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))


In [None]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix


X = pd.read_csv("X_data.csv")
y = pd.read_csv("y_data.csv")

ss = StandardScaler()
X_stand = ss.fit_transform(X)

pca = PCA(n_components=500)
X_pca = ss.fit_transform(X_stand)

y = y.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split( X_pca, y, test_size=0.2, random_state=42)

poly_svm = SVC(C=100, gamma=0.001,kernel='poly')
poly_svm.fit(X_train, y_train)

y_hat_test = poly_svm.decision_function(X_test)
y_pred = poly_svm.predict(X_test)
y_decision_test = (y_hat_test >= 0)
R_test = (y_decision_test == y_test)
Accuracy_test = np.sum(R_test) / len(y_decision_test)

FeatImportance = poly_svm.feature_importances_
print(FeatImportance)

Output = pd.DataFrame({"gender":y_test, "gender_predicted":y_hat_test})
Output.to_csv('SVMPolyImageResults.csv')

print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

In [9]:
import pandas as pd
import numpy as np
#import torch.nn as nn
from sklearn.model_selection import train_test_split
from numpy import loadtxt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow import keras
from sklearn.pipeline import make_pipeline
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import HalvingGridSearchCV
from sklearn import preprocessing
import math
import warnings
from tensorflow import get_logger
from keras.wrappers.scikit_learn import KerasClassifier
from scikeras.wrappers import KerasClassifier



print ("Here1")

X = pd.read_csv("X_data.csv")
y = pd.read_csv('y_data.csv')
y = y.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=400)

print ("Here2")

le = preprocessing.LabelEncoder()
le.fit(y_train)
y_train=le.transform(y_train)
y_train
X_test.reset_index()

print ("Here3")

def generateLayersNodes(n,input_nodes, output_nodes):
    layers = []
    change = (output_nodes-input_nodes)/ (n-1)
    nodes = input_nodes
    for i in range(1,n+1):
        layers.append(math.ceil(nodes))
        nodes+=change
    return layers

def createModel(n_layers,in_nodes,out_nodes, ac, lf,lr):
    model = Sequential()
    nodes_for_layers = generateLayersNodes(n_layers,in_nodes,out_nodes)
    for i in range(1,n_layers):
        if i == 1:
            model.add(Dense(in_nodes,input_shape = (7,),activation =ac ))
        else:
            model.add(Dense(nodes_for_layers[i-1],activation =ac ))
    model.add(Dense(1,activation=ac))
    model.compile(loss=lf,optimizer=tf.keras.optimizers.Adam(learning_rate=lr),metrics=['accuracy'])
    return model

def get_clf(meta, hidden_layer_sizes, dropout):
    n_features_in_ = meta["n_features_in_"]
    n_classes_ = meta["n_classes_"]
    model = keras.models.Sequential()
    model.add(keras.layers.Input(shape=(n_features_in_,)))
    for hidden_layer_size in hidden_layer_sizes:
        model.add(keras.layers.Dense(hidden_layer_size, activation="relu"))
        model.add(keras.layers.Dropout(dropout))
    model.add(keras.layers.Dense(1, activation="sigmoid"))
    return model

clf = KerasClassifier(
    model=get_clf,
    loss="binary_crossentropy",
    optimizer="adam",
    optimizer__lr=0.1,
    model__hidden_layer_sizes=(100,),
    model__dropout=0.5,
    verbose=False,
)

params = {
    'optimizer__lr': [0.05, 0.1],
    'model__hidden_layer_sizes': [(100, ), (50, 50, )],
    'model__dropout': [0, 0.5],
}

gs = HalvingGridSearchCV(clf, params, scoring='accuracy', n_jobs=-1, verbose=True)

print ("Here4")

gs.fit(X_train, y_train)

print ("Here5")

print(gs.best_score_, gs.best_params_)

f = open("ann_params.txt", "w")
f.write("Best Score : "+str(gs.best_score_))
f.write(str(gs.best_params_))
f.close()

print ("Here6")

Here1
Here2
Here3
Here4
n_iterations: 2
n_required_iterations: 2
n_possible_iterations: 2
min_resources_: 15641
max_resources_: 46925
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 8
n_resources: 15641
Fitting 5 folds for each of 8 candidates, totalling 40 fits




----------
iter: 1
n_candidates: 3
n_resources: 46923
Fitting 5 folds for each of 3 candidates, totalling 15 fits


  super(Adam, self).__init__(name, **kwargs)


Here5
0.5885976129582268 {'model__dropout': 0, 'model__hidden_layer_sizes': (50, 50), 'optimizer__lr': 0.05}
Here6


In [2]:
!pip install scikeras

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikeras
  Downloading scikeras-0.9.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.9.0
