In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Reading the data, Adding column names to it, and merging it as one

In [51]:
df1 = pd.read_csv('train_data.tsv', sep='\t')
df2 = pd.read_csv('test_data.tsv', sep='\t')
df3 = pd.read_csv('Validation_data.tsv', sep='\t')
print(df1.shape)
print(df2.shape)
print(df3.shape)
df1.columns = ['index', 'id', 'label', 'statement', 'subject', 'speaker', 'JobTitle', 'State', 'Party', 'BTC', 'FC', 'HT', 'MT', 'POF', 'context', 'justification']
df2.columns = ['index', 'id', 'label', 'statement', 'subject', 'speaker', 'JobTitle', 'State', 'Party', 'BTC', 'FC', 'HT', 'MT', 'POF', 'context', 'justification']
df3.columns = ['index', 'id', 'label', 'statement', 'subject', 'speaker', 'JobTitle', 'State', 'Party', 'BTC', 'FC', 'HT', 'MT', 'POF', 'context', 'justification']
df = pd.concat([df1, df2, df3], axis=0)
print(df.shape)
df.head()

(2, 1)
(2, 1)
(2, 1)


ValueError: Length mismatch: Expected axis has 1 elements, new values have 16 elements

In [None]:
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
df_copy=df.copy()
df_copy.drop(['statement','subject','justification'],axis=1,inplace=True)
df_copy['id'] = df_copy['id'].apply(lambda x: x[:-5])
df_copy['id'] = df_copy['id'].astype('int64')
le = LabelEncoder()
df_copy['label'] = le.fit_transform(df_copy['label'])
df_copy['speaker'] = le.fit_transform(df_copy['speaker'])
df_copy['JobTitle'] = le.fit_transform(df_copy['JobTitle'])
df_copy['State'] = le.fit_transform(df_copy['State'])
df_copy['Party'] = le.fit_transform(df_copy['Party'])
print(df_copy.head())
corr_matrix = df_copy.corr()
plt.figure(figsize=(11,11))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.show()

In [None]:
sns.scatterplot(x='BTC',y='FC',data=df_copy)
plt.show()
sns.scatterplot(x='POF',y='HT',data=df_copy)
plt.show()
sns.scatterplot(x='MT',y='FC',data=df_copy)
plt.show()

Dropping non-required columns

In [None]:
df = df.drop(['index', 'id', 'JobTitle', 'State', 'BTC', 'FC', 'HT', 'MT', 'POF', 'context', 'justification'], axis=1)
df.head()

Converting data into binary classification

In [None]:
df['label'] = df['label'].map({'true': 1, 'half-true': 1, 'mostly-true': 1, 'false': 0, 'pants-fire': 0, 'barely-true': 0})
df.head()

Merge the statement and subject columns into one column for analysis

In [None]:
df['text'] = df['subject'] + ' ' + df['statement']
df = df.drop(['subject', 'statement'], axis=1)
df.head()

Dropping missing values rows

In [None]:
print("Number of missing values in each column:")
print(df.isnull().sum())
print("We drop the missing values")
df = df.dropna()
print("The shape of the dataset is now: ", df.shape)

Converting data into lowercase

In [None]:
df = df.apply(lambda x: x.astype(str).str.lower())
print(df.head())

Removing punctuations except comma, any links and any extra white spaces

In [None]:
df['text'] = df['text'].str.replace('[^\w\s,]', '')
df['text'] = df['text'].str.replace('http\S+|www.\S+', '', case=False)
df['text'] = df['text'].str.replace(' ,', ',')
df['text'] = df['text'].str.replace(', ', ',')

In [None]:
print(df.head())

Tokenization of text column

In [None]:
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
df['text'] = df['text'].apply(lambda x: tokenizer.tokenize(x))
print(df.head())

Lemmatization of text column

In [None]:
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
df['text'] = df['text'].apply(lambda x: [lemmatizer.lemmatize(y) for y in x])
print(df.head())

Stop Words removal from text column

In [None]:
from nltk.corpus import stopwords
stop = stopwords.words('english')
df['text'] = df['text'].apply(lambda x: [item for item in x if item not in stop])
print(df.head())

Joining text column into a string for vectorization 

In [None]:
df['text'] = df['text'].apply(lambda x: ' '.join(x))

Visualization of data 

In [None]:
from wordcloud import WordCloud
all_words = ' '.join([text for text in df['text']])
wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(all_words)
plt.figure(figsize=(10, 7))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis('off')
plt.show()

Using TFIDF and BOW for vectorization

In [21]:
def to_vector_Tfidf(df, col):
    from sklearn.feature_extraction.text import TfidfVectorizer
    vectorizer = TfidfVectorizer(max_features=2000)
    vectorizer.fit(df[col])
    return vectorizer.transform(df[col])

def to_vector_bow(df, col):
    from sklearn.feature_extraction.text import CountVectorizer
    vectorizer = CountVectorizer(max_features=2000)
    vectorizer.fit(df[col])
    return vectorizer.transform(df[col])

In [None]:
text_vector_tfidf = to_vector_Tfidf(df, 'text')
print("Shape of the tfidf vector: ", text_vector_tfidf.shape)
print(text_vector_tfidf.shape)
text_vector_bow = to_vector_bow(df, 'text')
print("Shape of the text vector for bow vectorization: ", text_vector_bow.shape)
print(text_vector_bow.shape)

# running pca

In [None]:
text_vector_tfidf_copy=text_vector_tfidf.copy()
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit_transform(text_vector_tfidf_copy.toarray())
plt.figure(figsize=(10, 7))
plt.scatter(pca.components_[0], pca.components_[1])
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title('PCA on tfidf vector')
plt.show()

text_vector_bow_copy=text_vector_bow.copy()
pca = PCA(n_components=2)
pca.fit_transform(text_vector_bow_copy.toarray())
plt.figure(figsize=(10, 7))
plt.scatter(pca.components_[0], pca.components_[1])
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title('PCA on bow vector')
plt.show()

In [None]:
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_text_vector_tfidf_tsne_copy=text_vector_tfidf.copy()
tsne_text_vector_tfidf_tsne_copy = tsne.fit_transform(tsne_text_vector_tfidf_tsne_copy.toarray())
plt.figure(figsize=(10, 7))
import seaborn as sns
sns.scatterplot(x=tsne_text_vector_tfidf_tsne_copy[:,0], y=tsne_text_vector_tfidf_tsne_copy[:,1], hue=df['label'])
plt.title('TSNE on tfidf vector')
plt.show()

In [None]:
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_text_vector_bow_tsne_copy=text_vector_bow.copy()
tsne_text_vector_bow_tsne_copy = tsne.fit_transform(tsne_text_vector_bow_tsne_copy.toarray())
plt.figure(figsize=(10, 7))
sns.scatterplot(x=tsne_text_vector_bow_tsne_copy[:,0], y=tsne_text_vector_bow_tsne_copy[:,1], hue=df['label'])
plt.title('TSNE on bow vector')
plt.show()

In [None]:
text_vector_tfidf = text_vector_tfidf.toarray()
text_vector_bow = text_vector_bow.toarray()

In [None]:
label_vector = df['label'].values
speaker_vector = df['speaker'].values
party_vector = df['Party'].values
label_vector = label_vector.reshape(-1, 1)
speaker_vector = speaker_vector.reshape(-1, 1)
party_vector = party_vector.reshape(-1, 1)

In [None]:
dataF1 = np.concatenate((text_vector_tfidf, label_vector), axis=1)
dataF2 = np.concatenate((text_vector_bow, label_vector), axis=1)
dataF3 = np.concatenate((text_vector_tfidf, label_vector, speaker_vector, party_vector), axis=1)
dataF4 = np.concatenate((text_vector_bow, label_vector, speaker_vector, party_vector), axis=1)
print(dataF1.shape)
print(dataF2.shape)
print(dataF3.shape)
print(dataF4.shape)

In [None]:
dataF1=pd.DataFrame(dataF1)
dataF2=pd.DataFrame(dataF2)
dataF3=pd.DataFrame(dataF3)
dataF4=pd.DataFrame(dataF4)
print(dataF1.head())

In [None]:
for i in range(2000):
    dataF1.rename(columns={i: 'tfidf'+str(i)}, inplace=True)
    dataF2.rename(columns={i: 'bow'+str(i)}, inplace=True)
    dataF3.rename(columns={i: 'tfidf'+str(i)}, inplace=True)
    dataF4.rename(columns={i: 'bow'+str(i)}, inplace=True)
dataF1.rename(columns={2000: 'label'}, inplace=True)
dataF2.rename(columns={2000: 'label'}, inplace=True)
dataF3.rename(columns={2000: 'label'}, inplace=True)
dataF4.rename(columns={2000: 'label'}, inplace=True)
dataF3.rename(columns={2001: 'speaker'}, inplace=True)
dataF4.rename(columns={2001: 'speaker'}, inplace=True)
dataF3.rename(columns={2002: 'party'}, inplace=True)
dataF4.rename(columns={2002: 'party'}, inplace=True)

In [None]:
from sklearn.preprocessing import LabelEncoder
le3=LabelEncoder()
le4=LabelEncoder()
dataF3['speaker']=le3.fit_transform(dataF3['speaker'])
dataF4['speaker']=le4.fit_transform(dataF4['speaker'])
dataF3['party']=le3.fit_transform(dataF3['party'])
dataF4['party']=le4.fit_transform(dataF4['party'])

In [29]:
from sklearn.model_selection import train_test_split
X_train1, X_test1, y_train1, y_test1 = train_test_split(dataF1.drop('label', axis=1), dataF1['label'], test_size=0.2, random_state=0)
X_val1, X_test1, y_val1, y_test1 = train_test_split(X_test1, y_test1, test_size=0.5, random_state=0)
X_train2, X_test2, y_train2, y_test2 = train_test_split(dataF2.drop('label', axis=1), dataF2['label'], test_size=0.2, random_state=0)
X_val2, X_test2, y_val2, y_test2 = train_test_split(X_test2, y_test2, test_size=0.5, random_state=0)
X_train3, X_test3, y_train3, y_test3 = train_test_split(dataF3.drop('label', axis=1), dataF3['label'], test_size=0.2, random_state=0)
X_val3, X_test3, y_val3, y_test3 = train_test_split(X_test3, y_test3, test_size=0.5, random_state=0)
X_train4, X_test4, y_train4, y_test4 = train_test_split(dataF4.drop('label', axis=1), dataF4['label'], test_size=0.2, random_state=0)
X_val4, X_test4, y_val4, y_test4 = train_test_split(X_test4, y_test4, test_size=0.5, random_state=0)

In [30]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [31]:
from sklearn.linear_model import LogisticRegression

In [32]:
from sklearn.tree import DecisionTreeClassifier

In [33]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
print(dataF1.head())
print(dataF3.head())

# grid search and learning curves 

In [35]:
from sklearn.model_selection import learning_curve
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

In [36]:
def gauss_grid_learn(X_train,y_train,X_val,y_val):
    gnb=GaussianNB()
    param_grid = {'var_smoothing': np.logspace(0,-9, num=10)}
    grid_search = GridSearchCV(gnb, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    train_sizes, train_scores, test_scores = learning_curve(grid_search.best_estimator_, X_train, y_train, cv=3, scoring='accuracy', n_jobs=-1, verbose=1,shuffle=True)
    train_mean=np.mean(train_scores,axis=1)
    train_std=np.std(train_scores,axis=1)
    test_mean=np.mean(test_scores,axis=1)
    test_std=np.std(test_scores,axis=1)
    plt.plot(train_sizes,train_mean,color='blue',marker='o',label='training accuracy')
    plt.fill_between(train_sizes,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
    plt.plot(train_sizes,test_mean,color='green',linestyle='--',marker='s',label='validation accuracy')
    plt.fill_between(train_sizes,test_mean+test_std,test_mean-test_std,alpha=0.15,color='green')
    plt.title("Learning Curve for Gaussian Naive Bayes")
    plt.xlabel("Training Set Size")
    plt.ylabel("Accuracy Score")
    plt.legend(loc='best')
    plt.show()

def logistic_grid_learn(X_train,y_train,X_val,y_val):
    logreg=LogisticRegression(random_state=0,max_iter=20000)
    param_grid = {'C': [0.1, 1, 10],'tol': [0.0001, 0.001, 1, 10]}
    grid_search = GridSearchCV(logreg, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    train_sizes, train_scores, test_scores = learning_curve(grid_search.best_estimator_, X_train, y_train, cv=3, scoring='accuracy', n_jobs=-1, verbose=1,shuffle=True)
    train_mean=np.mean(train_scores,axis=1)
    train_std=np.std(train_scores,axis=1)
    test_mean=np.mean(test_scores,axis=1)
    test_std=np.std(test_scores,axis=1)
    plt.plot(train_sizes,train_mean,color='blue',marker='o',label='training accuracy')
    plt.fill_between(train_sizes,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
    plt.plot(train_sizes,test_mean,color='green',linestyle='--',marker='s',label='validation accuracy')
    plt.fill_between(train_sizes,test_mean+test_std,test_mean-test_std,alpha=0.15,color='green')
    plt.title("Learning Curve for Logistic Regression")
    plt.xlabel("Training Set Size")
    plt.ylabel("Accuracy Score")
    plt.legend(loc='best')
    plt.show()

def decision_grid_learn(X_train,y_train,X_val,y_val):
    dtree=DecisionTreeClassifier(random_state=0)
    param_grid = {'criterion':["gini","entropy"],'max_depth': [10, 50, 100, None]}
    grid_search = GridSearchCV(dtree, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    train_sizes, train_scores, test_scores = learning_curve(grid_search.best_estimator_, X_train, y_train, cv=3, scoring='accuracy', n_jobs=-1, verbose=1,shuffle=True)
    train_mean=np.mean(train_scores,axis=1)
    train_std=np.std(train_scores,axis=1)
    test_mean=np.mean(test_scores,axis=1)
    test_std=np.std(test_scores,axis=1)
    plt.plot(train_sizes,train_mean,color='blue',marker='o',label='training accuracy')
    plt.fill_between(train_sizes,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
    plt.plot(train_sizes,test_mean,color='green',linestyle='--',marker='s',label='validation accuracy')
    plt.fill_between(train_sizes,test_mean+test_std,test_mean-test_std,alpha=0.15,color='green')
    plt.title("Learning Curve for Decision Tree")
    plt.xlabel("Training Set Size")
    plt.ylabel("Accuracy Score")
    plt.legend(loc='best')
    plt.show()

def random_grid_learn(X_train,y_train,X_val,y_val):
    rtree=RandomForestClassifier(random_state=0)
    param_grid = {'max_depth': [10,50, 100, None],'criterion': ['gini', 'entropy']}
    grid_search = GridSearchCV(rtree, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    train_sizes, train_scores, test_scores = learning_curve(grid_search.best_estimator_, X_train, y_train, cv=3, scoring='accuracy', n_jobs=-1, verbose=1,shuffle=True)
    train_mean=np.mean(train_scores,axis=1)
    train_std=np.std(train_scores,axis=1)
    test_mean=np.mean(test_scores,axis=1)
    test_std=np.std(test_scores,axis=1)
    plt.plot(train_sizes,train_mean,color='blue',marker='o',label='training accuracy')
    plt.fill_between(train_sizes,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
    plt.plot(train_sizes,test_mean,color='green',linestyle='--',marker='s',label='validation accuracy')
    plt.fill_between(train_sizes,test_mean+test_std,test_mean-test_std,alpha=0.15,color='green')
    plt.title("Learning Curve for Random Forest")
    plt.xlabel("Training Set Size")
    plt.ylabel("Accuracy Score")
    plt.legend(loc='best')
    plt.show()

def ada_grid_learn(X_train,y_train,X_val,y_val):
    ada=AdaBoostClassifier(random_state=0)
    param_grid = {'n_estimators': [10, 50],'learning_rate': [0.1,0.5,1,2]}
    grid_search = GridSearchCV(ada, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    train_sizes, train_scores, test_scores = learning_curve(grid_search.best_estimator_, X_train, y_train, cv=3, scoring='accuracy', n_jobs=-1, verbose=1,shuffle=True)
    train_mean=np.mean(train_scores,axis=1)
    train_std=np.std(train_scores,axis=1)
    test_mean=np.mean(test_scores,axis=1)
    test_std=np.std(test_scores,axis=1)
    plt.plot(train_sizes,train_mean,color='blue',marker='o',label='training accuracy')
    plt.fill_between(train_sizes,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
    plt.plot(train_sizes,test_mean,color='green',linestyle='--',marker='s',label='validation accuracy')
    plt.fill_between(train_sizes,test_mean+test_std,test_mean-test_std,alpha=0.15,color='green')
    plt.title("Learning Curve for AdaBoost")
    plt.xlabel("Training Set Size")
    plt.ylabel("Accuracy Score")
    plt.legend(loc='best')
    plt.show()

def svm_grid_learn(X_train,y_train,X_val,y_val):
    svm=SVC(random_state=0)
    param_grid={'C':[0.1,1,100],'kernel':['rbf','linear']}
    grid_search = GridSearchCV(svm, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    train_sizes, train_scores, test_scores = learning_curve(grid_search.best_estimator_, X_train, y_train, cv=3, scoring='accuracy', n_jobs=-1, verbose=1,shuffle=True)
    train_mean=np.mean(train_scores,axis=1)
    train_std=np.std(train_scores,axis=1)
    test_mean=np.mean(test_scores,axis=1)
    test_std=np.std(test_scores,axis=1)
    plt.plot(train_sizes,train_mean,color='blue',marker='o',label='training accuracy')
    plt.fill_between(train_sizes,train_mean+train_std,train_mean-train_std,alpha=0.15,color='blue')
    plt.plot(train_sizes,test_mean,color='green',linestyle='--',marker='s',label='validation accuracy')
    plt.fill_between(train_sizes,test_mean+test_std,test_mean-test_std,alpha=0.15,color='green')
    plt.title("Learning Curve for SVM")
    plt.xlabel("Training Set Size")
    plt.ylabel("Accuracy Score")
    plt.legend(loc='best')
    plt.show()

def mlp_grid_learn(X_train,y_train,X_val,y_val):
    mlp=MLPClassifier(random_state=0,early_stopping=True,validation_fraction=0.2)
    param_grid={'activation':["relu","logistic"]}
    grid_search = GridSearchCV(mlp, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train,y_train)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val,grid_search.predict(X_val)))
    print(classification_report(y_val,grid_search.predict(X_val)))
    plt.plot(grid_search.best_estimator_.loss_curve_)
    plt.plot(grid_search.best_estimator_.validation_scores_)
    plt.title("Loss Curve and Validation Score")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend(["Loss Curve","Validation Score"])
    plt.show()

def mlp_pca_grid_learn(X_train,y_train,X_val,y_val):
    X_train_copy=X_train.copy()
    X_val_copy=X_val.copy()
    y_train_copy=y_train.copy()
    y_val_copy=y_val.copy()
    pca=PCA()
    X_train_copy=pca.fit_transform(X_train_copy)
    X_val_copy=pca.transform(X_val_copy)
    mlp=MLPClassifier(random_state=0,early_stopping=True,validation_fraction=0.2)
    param_grid={'activation':["relu","logistic"]}
    grid_search = GridSearchCV(mlp, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train_copy,y_train_copy)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val_copy,grid_search.predict(X_val_copy)))
    print(classification_report(y_val_copy,grid_search.predict(X_val_copy)))
    plt.plot(grid_search.best_estimator_.loss_curve_)
    plt.plot(grid_search.best_estimator_.validation_scores_)
    plt.title("Loss Curve and Validation Score")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend(["Loss Curve","Validation Score"])
    plt.show()

def mlp_tsne_grid_learn(X_train,y_train,X_val,y_val):
    X_train_copy=X_train.copy()
    X_val_copy=X_val.copy()
    y_train_copy=y_train.copy()
    y_val_copy=y_val.copy()
    tsne=TSNE(n_components=2)
    X=np.vstack((X_train_copy,X_val_copy))
    X=tsne.fit_transform(X)
    X_train_copy=X[:len(X_train_copy)]
    X_val_copy=X[len(X_train_copy):]
    mlp=MLPClassifier(random_state=0,early_stopping=True,validation_fraction=0.2)
    param_grid={'activation':["relu","logistic"]}
    grid_search = GridSearchCV(mlp, param_grid, cv=3,refit=True,n_jobs=-1,scoring='accuracy',verbose=1)
    grid_search.fit(X_train_copy,y_train_copy)
    print(grid_search.best_params_)
    print(grid_search.best_score_)
    print("Accuracy: ",accuracy_score(y_val_copy,grid_search.predict(X_val_copy)))
    print(classification_report(y_val_copy,grid_search.predict(X_val_copy)))
    plt.plot(grid_search.best_estimator_.loss_curve_)
    plt.plot(grid_search.best_estimator_.validation_scores_)
    plt.title("Loss Curve and Validation Score")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend(["Loss Curve","Validation Score"])
    plt.show()


In [None]:
gauss_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
gauss_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
gauss_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
gauss_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
logistic_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
logistic_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
logistic_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
logistic_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
decision_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
decision_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
decision_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
decision_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
random_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
random_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
random_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
random_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
ada_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
ada_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
ada_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
ada_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
svm_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
svm_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
svm_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
svm_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
mlp_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
mlp_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
mlp_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
mlp_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
mlp_pca_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
mlp_pca_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
mlp_pca_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
mlp_pca_grid_learn(X_train4,y_train4,X_val4,y_val4)

In [None]:
mlp_tsne_grid_learn(X_train1,y_train1,X_val1,y_val1)

In [None]:
mlp_tsne_grid_learn(X_train2,y_train2,X_val2,y_val2)

In [None]:
mlp_tsne_grid_learn(X_train3,y_train3,X_val3,y_val3)

In [None]:
mlp_tsne_grid_learn(X_train4,y_train4,X_val4,y_val4)