In [3]:
from sklearn import *
from sklearn.preprocessing import *
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import *
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# Understanding the data

In [2]:
dataframe = pd.read_csv('math_outputs.csv')

In [None]:
dataframe.corr(method="pearson")

plt.figure(figsize=(13,13))
sns.heatmap(dataframe.corr(), annot=True, fmt=".2f", cmap='Purples')
plt.show()

In [None]:
sns.violinplot(data=dataframe, x='chatter_presence', y='freq_sec_highest')

# Treating data

In [None]:
dataframe = pd.get_dummies(dataframe, columns=['chatter_presence'], drop_first=True)
dataframe.head(1)


In [None]:
dataframe["chatter_presence_s"] = dataframe["chatter_presence_s"].astype('bool')
dataframe["chatter_presence_i"] = dataframe["chatter_presence_i"].astype('bool')
dataframe.dtypes

In [None]:
y = dataframe['chatter_presence_s']
x = dataframe.copy().drop(['Experiment', 'chatter_presence_s', 'freq_max'], axis="columns", inplace=False)
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, random_state=0)

# Models

In [None]:
lreg = LinearRegression()
rf = RandomForestRegressor()
dt = DecisionTreeRegressor()
classifier_linear = SVC(kernel = 'linear', random_state = 0) # SVM
classifier_rbf = SVC(kernel = 'rbf', random_state = 0) # SVM

## Linear Regression

In [4]:
lreg.fit(x_train, y_train)

NameError: name 'lreg' is not defined

In [5]:
lreg_pred = lreg.predict(x_test)

NameError: name 'lreg' is not defined

In [6]:
r2_linear = r2_score(y_test, lreg_pred)
r2_linear

NameError: name 'y_test' is not defined

In [7]:
mae_linear = mean_absolute_error(y_test, lreg_pred)
mae_linear

NameError: name 'y_test' is not defined

## Random Forest


In [None]:
rf.fit(x_train, y_train)

In [None]:
rf_pred = rf.predict(x_test)

In [None]:
r2_rf = r2_score(y_test, rf_pred)

In [None]:
mae_rf = mean_absolute_error(y_test, rf_pred)

# Decision Tree

In [None]:
dt.fit(x_train, y_train)

In [8]:
dt_pred = dt.predict(x_test)

NameError: name 'dt' is not defined

In [None]:
r2_dt = r2_score(y_test, dt_pred)

In [None]:
mae_dt = mean_absolute_error(y_test, dt_pred)

In [None]:
params = {
    'criterion': ['mse', 'poisson', 'mae'],
    'max_depth': [3,5,8,15]
}

In [None]:
dt = GridSearchCV(DecisionTreeRegressor(), params, cv=5)
dt.get_params().keys()
dt.fit(x_train, y_train)
dt.best_estimator_ 

In [None]:
dt.best_estimator_.fit(x_train, y_train)

In [None]:
dt_pred = dt.predict(x_test)

In [None]:
r2_dt_best_est = r2_score(y_test, dt_pred)
r2_dt_best_est

In [None]:
mae_dt_best_est = mean_absolute_error(y_test, dt_pred)
mae_dt_best_est

## SVM

In [None]:
classifier_linear.fit(x_train, y_train)

In [None]:
svm_linear_pred = classifier_linear.predict(x_test)

In [9]:
acc_svm_linear = accuracy_score(y_test, svm_linear_pred)
acc_svm_linear

NameError: name 'y_test' is not defined

In [None]:
classifier_rbf.fit(x_train, y_train)

In [None]:
svm_rbf_pred = classifier_rbf.predict(x_test)

In [None]:
acc_svm_rbf = accuracy_score(y_test, svm_rbf_pred)

# Model results

In [14]:
model_results = pd.DataFrame(
    np.array([[r2_linear, r2_rf, r2_dt_best_est], 
    [mae_linear, mae_rf, mae_dt_best_est]]), 
    columns=['linear_regressor', 'random_forest', 'decision_tree'],
    index=['r2_score', 'mean_absolute_error']
)
svm_results = pd.DataFrame(
    np.array([[acc_svm_linear, acc_svm_rbf]]), 
    columns=['svm_linear', 'svm_rbf'],
    index=['accuracy_score']
)

NameError: name 'r2_linear' is not defined

In [None]:
model_results.round(4) 

In [None]:
svm_results.round(4)