In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas_profiling import ProfileReport
from statsmodels.formula.api import ols
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.feature_selection import SelectKBest, f_regression,mutual_info_regression
from sklearn.feature_selection import RFECV
from sklearn.linear_model import Lasso
from math import sqrt
import pickle as pkl


plt.style.use('ggplot')
pd.set_option('display.max_columns', 500)

In [21]:
features = pkl.load(open('features.pkl','rb'))
m_feature = pkl.load(open('m_feature.pkl','rb'))
p_feature = pkl.load(open('p_feature.pkl','rb'))


# original data set for math class
m_orig = pd.read_csv('MathClassOriginal.csv')
m_orig = m_orig.drop('Unnamed: 0',axis =1)

# original dataset for language arts class
p_orig = pd.read_csv('PortugeseClassOriginal.csv')
p_orig = p_orig.drop('Unnamed: 0',axis =1)

m = pd.read_csv('MathClassCleaned.csv')
m = m.drop(columns='Unnamed: 0')
p = pd.read_csv('PortugeseClassCleaned.csv')
p = p.drop(columns='Unnamed: 0')
p_target=p.G3
p_data = p_feature
p_data = p_data.drop(columns = ['school',"guardian"])

In [16]:
model = ols(data = p, formula='G3~studytime+failures+schoolsup+higher+freetime+Dalc+Walc+health+address_type_Urban+Pstatus_Together+Mjob_health+Mjob_services+Mjob_teacher+Fjob_health+Fjob_services+reason_course+reason_home+reason_reputation').fit()
model.summary()

0,1,2,3
Dep. Variable:,G3,R-squared:,0.31
Model:,OLS,Adj. R-squared:,0.29
Method:,Least Squares,F-statistic:,15.71
Date:,"Sun, 13 Sep 2020",Prob (F-statistic):,4.67e-40
Time:,01:43:53,Log-Likelihood:,-1561.2
No. Observations:,649,AIC:,3160.0
Df Residuals:,630,BIC:,3245.0
Df Model:,18,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,9.9124,0.758,13.078,0.000,8.424,11.401
studytime,0.5108,0.138,3.709,0.000,0.240,0.781
failures,-1.4916,0.195,-7.664,0.000,-1.874,-1.109
schoolsup,-1.0429,0.357,-2.920,0.004,-1.744,-0.341
higher,1.8288,0.378,4.837,0.000,1.086,2.571
freetime,-0.1685,0.105,-1.612,0.107,-0.374,0.037
Dalc,-0.2269,0.151,-1.503,0.133,-0.523,0.070
Walc,-0.1526,0.110,-1.393,0.164,-0.368,0.062
health,-0.1657,0.077,-2.166,0.031,-0.316,-0.015

0,1,2,3
Omnibus:,132.762,Durbin-Watson:,1.867
Prob(Omnibus):,0.0,Jarque-Bera (JB):,427.221
Skew:,-0.96,Prob(JB):,1.7e-93
Kurtosis:,6.481,Cond. No.,49.0


In [24]:
X_train, X_test, y_train, y_test = train_test_split(
    p_data, target, random_state=9,test_size=0.2
)
scaler = StandardScaler()

# fit the scaler to the training data
scaler.fit(X_train)

#transform the training data
X_train = pd.DataFrame(data=scaler.transform(X_train), columns=p_data.columns)

#transform the testing dat
X_test = pd.DataFrame(data=scaler.transform(X_test), columns=p_data.columns)

#instantiate a linear regression object
lm = LinearRegression()

#fit the linear regression to the data
lm = lm.fit(X_train, y_train)

y_train_pred = lm.predict(X_train)

train_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))


print('Training Root Mean Squared Error:' , train_rmse)




Training Root Mean Squared Error: 2.691212447987795


In [26]:
selector = SelectKBest(f_regression, k=15)

selector.fit(X_train, y_train)

selected_columns = X_train.columns[selector.get_support()]
removed_columns = X_train.columns[~selector.get_support()]

In [27]:
len(removed_columns)

20

In [28]:
#instantiate a linear regression object
lm_kbest = LinearRegression()

#fit the linear regression to the data
lm_kbest = lm_kbest.fit(X_train[selected_columns], y_train)

y_train_kbest = lm_kbest.predict(X_train[selected_columns])


trainK_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_kbest))


print('Training Root Mean Squared Error:' , trainK_rmse)

y_kbest = lm_kbest.predict(X_test[selected_columns])

testK_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_kbest))

print('Testing Root Mean Squared Error:' , testK_rmse)


Training Root Mean Squared Error: 2.769321851114437
Testing Root Mean Squared Error: 2.5818889317860414


In [29]:
#instantiate a linear regression object
lm = LinearRegression()

#fit the linear regression to the data
lm = lm.fit(X_train, y_train)

y_train_pred = lm.predict(X_train)

train_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))


print('Training Root Mean Squared Error:' , train_rmse)

Training Root Mean Squared Error: 2.691212447987795


In [30]:
#use fitted model to predict on the test examples
y_test_pred = lm.predict(X_test)

#evaluate the predictions on the test examples
test_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_test_pred))

print('Testing Root Mean Squared Error:' , test_rmse)


print('Training: ', (train_rmse), "vs. Testing: ", (test_rmse))

Testing Root Mean Squared Error: 2.5818025560884137
Training:  2.691212447987795 vs. Testing:  2.5818025560884137


In [32]:
selector = SelectKBest(f_regression, k=20)

selector.fit(X_train, y_train)

selected_columns = X_train.columns[selector.get_support()]
removed_columns = X_train.columns[~selector.get_support()]

In [33]:
list(selected_columns)

['Medu',
 'Fedu',
 'traveltime',
 'studytime',
 'failures',
 'higher',
 'internet',
 'romantic',
 'freetime',
 'Dalc',
 'Walc',
 'health',
 'absences',
 'address_type_Urban',
 'Mjob_at_home',
 'Mjob_teacher',
 'Fjob_teacher',
 'reason_course',
 'reason_reputation',
 'super']

In [34]:
list(removed_columns)

['schoolsup',
 'famsup',
 'activities',
 'nursery',
 'famrel',
 'goout',
 'famsize_BIG_FAMILY',
 'Pstatus_Together',
 'Mjob_health',
 'Mjob_services',
 'Fjob_at_home',
 'Fjob_health',
 'Fjob_services',
 'reason_home',
 'tutor']

In [35]:
#instantiate a linear regression object
lm_kbest = LinearRegression()

#fit the linear regression to the data
lm_kbest = lm_kbest.fit(X_train[selected_columns], y_train)

y_train_kbest = lm_kbest.predict(X_train[selected_columns])


trainK_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_kbest))


print('Training Root Mean Squared Error:' , trainK_rmse)

y_kbest = lm_kbest.predict(X_test[selected_columns])

testK_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_kbest))

print('Testing Root Mean Squared Error:' , testK_rmse)




Training Root Mean Squared Error: 2.7493255562626446
Testing Root Mean Squared Error: 2.57145840237804


In [36]:
print('Original: ', test_rmse, '\n',
      "KBest:   ", testK_rmse,'\n')

Original:  2.5818025560884137 
 KBest:    2.57145840237804 



In [40]:
estimator = linear_model.LinearRegression()
# Create recursive feature eliminator that scores features by mean squared errors
selector = RFECV(estimator,step=1, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)

# Fit recursive feature eliminator 
selector.fit(X_train, y_train)


RFECV(cv=5, estimator=LinearRegression(), n_jobs=-1,
      scoring='neg_mean_squared_error', verbose=1)

In [41]:
selected_rfe = X_train.columns[selector.support_]
removed_rfe = X_train.columns[~selector.support_]


In [42]:
list(removed_rfe)

[]

In [43]:
len(list(selected_rfe))

35

In [44]:
#instantiate a linear regression object
lm_rfe = LinearRegression()

#fit the linear regression to the data
lm_rfe = lm_rfe.fit(X_train[selected_rfe], y_train)

y_rfe = lm_rfe.predict(X_train[selected_rfe])


trainRFE_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_rfe))


print('Training Root Mean Squared Error:' , trainRFE_rmse)

y_pred_rfe = lm_rfe.predict(X_test[selected_rfe])

testRFE_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred_rfe))

print('Testing Root Mean Squared Error:' , testRFE_rmse)

Training Root Mean Squared Error: 2.691212447987795
Testing Root Mean Squared Error: 2.5818025560884137


In [45]:
print('Original: ', test_rmse, '\n',
      "KBest:   ", testK_rmse,'\n',
      "RFE:     ", testRFE_rmse)

Original:  2.5818025560884137 
 KBest:    2.57145840237804 
 RFE:      2.5818025560884137


In [50]:
poly_2 = PolynomialFeatures(degree=2, include_bias=False)
poly2_data = poly_2.fit_transform(p_data)
poly2_columns = poly_2.get_feature_names(p_data.columns)
df_poly2 = pd.DataFrame(poly2_data, columns=poly2_columns)
df_poly2.head()

Unnamed: 0,Medu,Fedu,traveltime,studytime,failures,schoolsup,famsup,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,address_type_Urban,famsize_BIG_FAMILY,Pstatus_Together,Mjob_at_home,Mjob_health,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_reputation,tutor,super,Medu^2,Medu Fedu,Medu traveltime,Medu studytime,Medu failures,Medu schoolsup,Medu famsup,Medu activities,Medu nursery,Medu higher,Medu internet,Medu romantic,Medu famrel,Medu freetime,Medu goout,Medu Dalc,Medu Walc,Medu health,Medu absences,Medu address_type_Urban,Medu famsize_BIG_FAMILY,Medu Pstatus_Together,Medu Mjob_at_home,Medu Mjob_health,Medu Mjob_services,Medu Mjob_teacher,Medu Fjob_at_home,Medu Fjob_health,Medu Fjob_services,Medu Fjob_teacher,Medu reason_course,Medu reason_home,Medu reason_reputation,Medu tutor,Medu super,Fedu^2,Fedu traveltime,Fedu studytime,Fedu failures,Fedu schoolsup,Fedu famsup,Fedu activities,Fedu nursery,Fedu higher,Fedu internet,Fedu romantic,Fedu famrel,Fedu freetime,Fedu goout,Fedu Dalc,Fedu Walc,Fedu health,Fedu absences,Fedu address_type_Urban,Fedu famsize_BIG_FAMILY,Fedu Pstatus_Together,Fedu Mjob_at_home,Fedu Mjob_health,Fedu Mjob_services,Fedu Mjob_teacher,Fedu Fjob_at_home,Fedu Fjob_health,Fedu Fjob_services,Fedu Fjob_teacher,Fedu reason_course,Fedu reason_home,Fedu reason_reputation,Fedu tutor,Fedu super,traveltime^2,traveltime studytime,traveltime failures,traveltime schoolsup,traveltime famsup,traveltime activities,traveltime nursery,traveltime higher,traveltime internet,traveltime romantic,traveltime famrel,traveltime freetime,traveltime goout,traveltime Dalc,traveltime Walc,traveltime health,traveltime absences,traveltime address_type_Urban,traveltime famsize_BIG_FAMILY,traveltime Pstatus_Together,traveltime Mjob_at_home,traveltime Mjob_health,traveltime Mjob_services,traveltime Mjob_teacher,traveltime Fjob_at_home,traveltime Fjob_health,traveltime Fjob_services,traveltime Fjob_teacher,traveltime reason_course,traveltime reason_home,traveltime reason_reputation,traveltime tutor,traveltime super,studytime^2,studytime failures,studytime schoolsup,studytime famsup,studytime activities,studytime nursery,studytime higher,studytime internet,studytime romantic,studytime famrel,studytime freetime,studytime goout,studytime Dalc,studytime Walc,studytime health,studytime absences,studytime address_type_Urban,studytime famsize_BIG_FAMILY,studytime Pstatus_Together,studytime Mjob_at_home,studytime Mjob_health,studytime Mjob_services,studytime Mjob_teacher,studytime Fjob_at_home,studytime Fjob_health,studytime Fjob_services,studytime Fjob_teacher,studytime reason_course,studytime reason_home,studytime reason_reputation,studytime tutor,studytime super,failures^2,failures schoolsup,failures famsup,failures activities,failures nursery,failures higher,failures internet,failures romantic,failures famrel,failures freetime,failures goout,failures Dalc,failures Walc,failures health,failures absences,failures address_type_Urban,failures famsize_BIG_FAMILY,failures Pstatus_Together,failures Mjob_at_home,failures Mjob_health,failures Mjob_services,failures Mjob_teacher,failures Fjob_at_home,failures Fjob_health,failures Fjob_services,failures Fjob_teacher,failures reason_course,failures reason_home,failures reason_reputation,failures tutor,failures super,schoolsup^2,schoolsup famsup,schoolsup activities,schoolsup nursery,schoolsup higher,schoolsup internet,schoolsup romantic,schoolsup famrel,schoolsup freetime,schoolsup goout,schoolsup Dalc,schoolsup Walc,schoolsup health,schoolsup absences,schoolsup address_type_Urban,schoolsup famsize_BIG_FAMILY,schoolsup Pstatus_Together,schoolsup Mjob_at_home,schoolsup Mjob_health,schoolsup Mjob_services,schoolsup Mjob_teacher,schoolsup Fjob_at_home,schoolsup Fjob_health,schoolsup Fjob_services,schoolsup Fjob_teacher,schoolsup reason_course,schoolsup reason_home,schoolsup reason_reputation,schoolsup tutor,schoolsup super,famsup^2,famsup activities,famsup nursery,famsup higher,famsup internet,famsup romantic,famsup famrel,famsup freetime,famsup goout,famsup Dalc,famsup Walc,famsup health,famsup absences,famsup address_type_Urban,famsup famsize_BIG_FAMILY,famsup Pstatus_Together,famsup Mjob_at_home,famsup Mjob_health,famsup Mjob_services,famsup Mjob_teacher,...,freetime Walc,freetime health,freetime absences,freetime address_type_Urban,freetime famsize_BIG_FAMILY,freetime Pstatus_Together,freetime Mjob_at_home,freetime Mjob_health,freetime Mjob_services,freetime Mjob_teacher,freetime Fjob_at_home,freetime Fjob_health,freetime Fjob_services,freetime Fjob_teacher,freetime reason_course,freetime reason_home,freetime reason_reputation,freetime tutor,freetime super,goout^2,goout Dalc,goout Walc,goout health,goout absences,goout address_type_Urban,goout famsize_BIG_FAMILY,goout Pstatus_Together,goout Mjob_at_home,goout Mjob_health,goout Mjob_services,goout Mjob_teacher,goout Fjob_at_home,goout Fjob_health,goout Fjob_services,goout Fjob_teacher,goout reason_course,goout reason_home,goout reason_reputation,goout tutor,goout super,Dalc^2,Dalc Walc,Dalc health,Dalc absences,Dalc address_type_Urban,Dalc famsize_BIG_FAMILY,Dalc Pstatus_Together,Dalc Mjob_at_home,Dalc Mjob_health,Dalc Mjob_services,Dalc Mjob_teacher,Dalc Fjob_at_home,Dalc Fjob_health,Dalc Fjob_services,Dalc Fjob_teacher,Dalc reason_course,Dalc reason_home,Dalc reason_reputation,Dalc tutor,Dalc super,Walc^2,Walc health,Walc absences,Walc address_type_Urban,Walc famsize_BIG_FAMILY,Walc Pstatus_Together,Walc Mjob_at_home,Walc Mjob_health,Walc Mjob_services,Walc Mjob_teacher,Walc Fjob_at_home,Walc Fjob_health,Walc Fjob_services,Walc Fjob_teacher,Walc reason_course,Walc reason_home,Walc reason_reputation,Walc tutor,Walc super,health^2,health absences,health address_type_Urban,health famsize_BIG_FAMILY,health Pstatus_Together,health Mjob_at_home,health Mjob_health,health Mjob_services,health Mjob_teacher,health Fjob_at_home,health Fjob_health,health Fjob_services,health Fjob_teacher,health reason_course,health reason_home,health reason_reputation,health tutor,health super,absences^2,absences address_type_Urban,absences famsize_BIG_FAMILY,absences Pstatus_Together,absences Mjob_at_home,absences Mjob_health,absences Mjob_services,absences Mjob_teacher,absences Fjob_at_home,absences Fjob_health,absences Fjob_services,absences Fjob_teacher,absences reason_course,absences reason_home,absences reason_reputation,absences tutor,absences super,address_type_Urban^2,address_type_Urban famsize_BIG_FAMILY,address_type_Urban Pstatus_Together,address_type_Urban Mjob_at_home,address_type_Urban Mjob_health,address_type_Urban Mjob_services,address_type_Urban Mjob_teacher,address_type_Urban Fjob_at_home,address_type_Urban Fjob_health,address_type_Urban Fjob_services,address_type_Urban Fjob_teacher,address_type_Urban reason_course,address_type_Urban reason_home,address_type_Urban reason_reputation,address_type_Urban tutor,address_type_Urban super,famsize_BIG_FAMILY^2,famsize_BIG_FAMILY Pstatus_Together,famsize_BIG_FAMILY Mjob_at_home,famsize_BIG_FAMILY Mjob_health,famsize_BIG_FAMILY Mjob_services,famsize_BIG_FAMILY Mjob_teacher,famsize_BIG_FAMILY Fjob_at_home,famsize_BIG_FAMILY Fjob_health,famsize_BIG_FAMILY Fjob_services,famsize_BIG_FAMILY Fjob_teacher,famsize_BIG_FAMILY reason_course,famsize_BIG_FAMILY reason_home,famsize_BIG_FAMILY reason_reputation,famsize_BIG_FAMILY tutor,famsize_BIG_FAMILY super,Pstatus_Together^2,Pstatus_Together Mjob_at_home,Pstatus_Together Mjob_health,Pstatus_Together Mjob_services,Pstatus_Together Mjob_teacher,Pstatus_Together Fjob_at_home,Pstatus_Together Fjob_health,Pstatus_Together Fjob_services,Pstatus_Together Fjob_teacher,Pstatus_Together reason_course,Pstatus_Together reason_home,Pstatus_Together reason_reputation,Pstatus_Together tutor,Pstatus_Together super,Mjob_at_home^2,Mjob_at_home Mjob_health,Mjob_at_home Mjob_services,Mjob_at_home Mjob_teacher,Mjob_at_home Fjob_at_home,Mjob_at_home Fjob_health,Mjob_at_home Fjob_services,Mjob_at_home Fjob_teacher,Mjob_at_home reason_course,Mjob_at_home reason_home,Mjob_at_home reason_reputation,Mjob_at_home tutor,Mjob_at_home super,Mjob_health^2,Mjob_health Mjob_services,Mjob_health Mjob_teacher,Mjob_health Fjob_at_home,Mjob_health Fjob_health,Mjob_health Fjob_services,Mjob_health Fjob_teacher,Mjob_health reason_course,Mjob_health reason_home,Mjob_health reason_reputation,Mjob_health tutor,Mjob_health super,Mjob_services^2,Mjob_services Mjob_teacher,Mjob_services Fjob_at_home,Mjob_services Fjob_health,Mjob_services Fjob_services,Mjob_services Fjob_teacher,Mjob_services reason_course,Mjob_services reason_home,Mjob_services reason_reputation,Mjob_services tutor,Mjob_services super,Mjob_teacher^2,Mjob_teacher Fjob_at_home,Mjob_teacher Fjob_health,Mjob_teacher Fjob_services,Mjob_teacher Fjob_teacher,Mjob_teacher reason_course,Mjob_teacher reason_home,Mjob_teacher reason_reputation,Mjob_teacher tutor,Mjob_teacher super,Fjob_at_home^2,Fjob_at_home Fjob_health,Fjob_at_home Fjob_services,Fjob_at_home Fjob_teacher,Fjob_at_home reason_course,Fjob_at_home reason_home,Fjob_at_home reason_reputation,Fjob_at_home tutor,Fjob_at_home super,Fjob_health^2,Fjob_health Fjob_services,Fjob_health Fjob_teacher,Fjob_health reason_course,Fjob_health reason_home,Fjob_health reason_reputation,Fjob_health tutor,Fjob_health super,Fjob_services^2,Fjob_services Fjob_teacher,Fjob_services reason_course,Fjob_services reason_home,Fjob_services reason_reputation,Fjob_services tutor,Fjob_services super,Fjob_teacher^2,Fjob_teacher reason_course,Fjob_teacher reason_home,Fjob_teacher reason_reputation,Fjob_teacher tutor,Fjob_teacher super,reason_course^2,reason_course reason_home,reason_course reason_reputation,reason_course tutor,reason_course super,reason_home^2,reason_home reason_reputation,reason_home tutor,reason_home super,reason_reputation^2,reason_reputation tutor,reason_reputation super,tutor^2,tutor super,super^2
0,4.0,4.0,2.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,4.0,1.0,1.0,3.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,16.0,16.0,8.0,8.0,0.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,16.0,12.0,16.0,4.0,4.0,12.0,16.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,64.0,16.0,8.0,8.0,0.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,16.0,12.0,16.0,4.0,4.0,12.0,16.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,64.0,4.0,4.0,0.0,2.0,0.0,0.0,2.0,2.0,0.0,0.0,8.0,6.0,8.0,2.0,2.0,6.0,8.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,32.0,4.0,0.0,2.0,0.0,0.0,2.0,2.0,0.0,0.0,8.0,6.0,8.0,2.0,2.0,6.0,8.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,4.0,1.0,1.0,3.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3.0,9.0,12.0,3.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,48.0,16.0,4.0,4.0,12.0,16.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,64.0,1.0,1.0,3.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,1.0,3.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,9.0,12.0,3.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,0.0,0.0,0.0,48.0,16.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,64.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,256.0
1,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,4.0,0.0,0.0,2.0,0.0,0.0,2.0,2.0,0.0,10.0,6.0,6.0,2.0,2.0,6.0,4.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,3.0,9.0,6.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,46.5,9.0,3.0,3.0,9.0,6.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,46.5,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,9.0,6.0,3.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,46.5,4.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,31.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,240.25
2,1.0,1.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,1.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,4.0,0.0,2.0,0.0,0.0,2.0,2.0,2.0,0.0,8.0,6.0,4.0,4.0,6.0,6.0,12.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,9.0,9.0,18.0,3.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.5,4.0,4.0,6.0,6.0,12.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,4.0,6.0,6.0,12.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,9.0,9.0,18.0,3.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.5,9.0,18.0,3.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.5,36.0,6.0,0.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,56.25
3,4.0,2.0,1.0,3.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,16.0,8.0,4.0,12.0,0.0,0.0,4.0,4.0,4.0,4.0,4.0,4.0,12.0,8.0,8.0,4.0,4.0,20.0,0.0,4.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,106.0,4.0,2.0,6.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,6.0,4.0,4.0,2.0,2.0,10.0,0.0,2.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,53.0,1.0,3.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,9.0,0.0,0.0,3.0,3.0,3.0,3.0,3.0,3.0,9.0,6.0,6.0,3.0,3.0,15.0,0.0,3.0,3.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,79.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,...,2.0,10.0,0.0,2.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,53.0,4.0,2.0,2.0,10.0,0.0,2.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,53.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,25.0,0.0,5.0,5.0,5.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,5.0,0.0,0.0,132.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,26.5,0.0,0.0,0.0,0.0,0.0,702.25
4,3.0,3.0,1.0,2.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,2.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,9.0,9.0,3.0,6.0,0.0,0.0,3.0,0.0,3.0,3.0,0.0,0.0,12.0,9.0,6.0,3.0,6.0,15.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,64.5,9.0,3.0,6.0,0.0,0.0,3.0,0.0,3.0,3.0,0.0,0.0,12.0,9.0,6.0,3.0,6.0,15.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,64.5,1.0,2.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,2.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,4.0,0.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,8.0,6.0,4.0,2.0,4.0,10.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,43.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,2.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,6.0,15.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,64.5,4.0,2.0,4.0,10.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,43.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,4.0,10.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,43.0,25.0,0.0,5.0,5.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,107.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,0.0,0.0,0.0,0.0,0.0,462.25


In [51]:
X_train, X_test, y_train, y_test = train_test_split(
    df_poly2, target, random_state=9,test_size=0.2
         )
scaler2 = StandardScaler()
# fit the scaler to the training data
scaler2.fit(X_train)

#transform the training data
X_train = pd.DataFrame(data=scaler2.transform(X_train), columns=df_poly2.columns)

#transform the testing dat
X_test = pd.DataFrame(data=scaler2.transform(X_test), columns=df_poly2.columns)


In [52]:
# instantiate a linear regression object
lm_2 = LinearRegression()

#fit the linear regression to the data
lm_2 = lm_2.fit(X_train, y_train)

y_train_pred = lm_2.predict(X_train)

train_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))


print('Training Root Mean Squared Error:' , train_rmse)

Training Root Mean Squared Error: 8.342648506431632e-14


In [53]:
# use fitted model to predict on test data
y_pred = lm_2.predict(X_test)

test_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))

print('Testing Root Mean Squared Error:' , test_rmse)


print('Training: ', int(train_rmse), "vs. Testing: ", int(test_rmse))

Testing Root Mean Squared Error: 22.694844375509433
Training:  0 vs. Testing:  22


In [54]:
selector = SelectKBest(f_regression, k=80)

selector.fit(X_train, y_train)

  corr /= X_norms
  cond2 = cond0 & (x <= _a)


SelectKBest(k=80, score_func=<function f_regression at 0x7fbca0b4e9d0>)

In [55]:
selected_columns = X_train.columns[selector.get_support()]
removed_columns = X_train.columns[~selector.get_support()]

In [56]:
list(selected_columns)

['Medu',
 'Fedu',
 'studytime',
 'failures',
 'higher',
 'Dalc',
 'Walc',
 'super',
 'Medu^2',
 'Medu Fedu',
 'Medu studytime',
 'Medu failures',
 'Medu higher',
 'Medu internet',
 'Medu address_type_Urban',
 'Medu super',
 'Fedu^2',
 'Fedu studytime',
 'Fedu failures',
 'Fedu higher',
 'Fedu internet',
 'Fedu famrel',
 'Fedu address_type_Urban',
 'Fedu reason_reputation',
 'Fedu super',
 'traveltime failures',
 'studytime^2',
 'studytime failures',
 'studytime higher',
 'studytime internet',
 'studytime famrel',
 'studytime address_type_Urban',
 'studytime reason_reputation',
 'studytime super',
 'failures^2',
 'failures famsup',
 'failures activities',
 'failures nursery',
 'failures higher',
 'failures internet',
 'failures romantic',
 'failures famrel',
 'failures freetime',
 'failures goout',
 'failures Dalc',
 'failures Walc',
 'failures health',
 'failures absences',
 'failures address_type_Urban',
 'failures famsize_BIG_FAMILY',
 'failures Pstatus_Together',
 'failures Mjob_at_

In [57]:
#instantiate a linear regression object
lm_kbest = LinearRegression()

#fit the linear regression to the data
lm_kbest = lm_kbest.fit(X_train[selected_columns], y_train)

y_train_kbest = lm_kbest.predict(X_train[selected_columns])


trainK_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_kbest))


print('Training Root Mean Squared Error:' , trainK_rmse)

y_kbest = lm_kbest.predict(X_test[selected_columns])

testK_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_kbest))

print('Testing Root Mean Squared Error:' , testK_rmse)




Training Root Mean Squared Error: 2.911824433765815
Testing Root Mean Squared Error: 2.897913109569523


In [84]:
poly_5 = PolynomialFeatures(degree=5, include_bias=False)
poly5_data = poly_5.fit_transform(p_data)
poly5_columns = poly_5.get_feature_names(p_data.columns)
df_poly5 = pd.DataFrame(poly5_data, columns=poly5_columns)
df_poly5.columns = df_poly5.columns.str.replace(' ', '')
df_poly5.columns = df_poly5.columns.str.replace('^', '')
df_poly5.head()

Unnamed: 0,Medu,Fedu,traveltime,studytime,failures,schoolsup,famsup,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,address_type_Urban,famsize_BIG_FAMILY,Pstatus_Together,Mjob_at_home,Mjob_health,Mjob_services,Mjob_teacher,Fjob_at_home,Fjob_health,Fjob_services,Fjob_teacher,reason_course,reason_home,reason_reputation,tutor,super,Medu2,MeduFedu,Medutraveltime,Medustudytime,Medufailures,Meduschoolsup,Medufamsup,Meduactivities,Medunursery,Meduhigher,Meduinternet,Meduromantic,Medufamrel,Medufreetime,Medugoout,MeduDalc,MeduWalc,Meduhealth,Meduabsences,Meduaddress_type_Urban,Medufamsize_BIG_FAMILY,MeduPstatus_Together,MeduMjob_at_home,MeduMjob_health,MeduMjob_services,MeduMjob_teacher,MeduFjob_at_home,MeduFjob_health,MeduFjob_services,MeduFjob_teacher,Medureason_course,Medureason_home,Medureason_reputation,Medututor,Medusuper,Fedu2,Fedutraveltime,Fedustudytime,Fedufailures,Feduschoolsup,Fedufamsup,Feduactivities,Fedunursery,Feduhigher,Feduinternet,Feduromantic,Fedufamrel,Fedufreetime,Fedugoout,FeduDalc,FeduWalc,Feduhealth,Feduabsences,Feduaddress_type_Urban,Fedufamsize_BIG_FAMILY,FeduPstatus_Together,FeduMjob_at_home,FeduMjob_health,FeduMjob_services,FeduMjob_teacher,FeduFjob_at_home,FeduFjob_health,FeduFjob_services,FeduFjob_teacher,Fedureason_course,Fedureason_home,Fedureason_reputation,Fedututor,Fedusuper,traveltime2,traveltimestudytime,traveltimefailures,traveltimeschoolsup,traveltimefamsup,traveltimeactivities,traveltimenursery,traveltimehigher,traveltimeinternet,traveltimeromantic,traveltimefamrel,traveltimefreetime,traveltimegoout,traveltimeDalc,traveltimeWalc,traveltimehealth,traveltimeabsences,traveltimeaddress_type_Urban,traveltimefamsize_BIG_FAMILY,traveltimePstatus_Together,traveltimeMjob_at_home,traveltimeMjob_health,traveltimeMjob_services,traveltimeMjob_teacher,traveltimeFjob_at_home,traveltimeFjob_health,traveltimeFjob_services,traveltimeFjob_teacher,traveltimereason_course,traveltimereason_home,traveltimereason_reputation,traveltimetutor,traveltimesuper,studytime2,studytimefailures,studytimeschoolsup,studytimefamsup,studytimeactivities,studytimenursery,studytimehigher,studytimeinternet,studytimeromantic,studytimefamrel,studytimefreetime,studytimegoout,studytimeDalc,studytimeWalc,studytimehealth,studytimeabsences,studytimeaddress_type_Urban,studytimefamsize_BIG_FAMILY,studytimePstatus_Together,studytimeMjob_at_home,studytimeMjob_health,studytimeMjob_services,studytimeMjob_teacher,studytimeFjob_at_home,studytimeFjob_health,studytimeFjob_services,studytimeFjob_teacher,studytimereason_course,studytimereason_home,studytimereason_reputation,studytimetutor,studytimesuper,failures2,failuresschoolsup,failuresfamsup,failuresactivities,failuresnursery,failureshigher,failuresinternet,failuresromantic,failuresfamrel,failuresfreetime,failuresgoout,failuresDalc,failuresWalc,failureshealth,failuresabsences,failuresaddress_type_Urban,failuresfamsize_BIG_FAMILY,failuresPstatus_Together,failuresMjob_at_home,failuresMjob_health,failuresMjob_services,failuresMjob_teacher,failuresFjob_at_home,failuresFjob_health,failuresFjob_services,failuresFjob_teacher,failuresreason_course,failuresreason_home,failuresreason_reputation,failurestutor,failuressuper,schoolsup2,schoolsupfamsup,schoolsupactivities,schoolsupnursery,schoolsuphigher,schoolsupinternet,schoolsupromantic,schoolsupfamrel,schoolsupfreetime,schoolsupgoout,schoolsupDalc,schoolsupWalc,schoolsuphealth,schoolsupabsences,schoolsupaddress_type_Urban,schoolsupfamsize_BIG_FAMILY,schoolsupPstatus_Together,schoolsupMjob_at_home,schoolsupMjob_health,schoolsupMjob_services,schoolsupMjob_teacher,schoolsupFjob_at_home,schoolsupFjob_health,schoolsupFjob_services,schoolsupFjob_teacher,schoolsupreason_course,schoolsupreason_home,schoolsupreason_reputation,schoolsuptutor,schoolsupsuper,famsup2,famsupactivities,famsupnursery,famsuphigher,famsupinternet,famsupromantic,famsupfamrel,famsupfreetime,famsupgoout,famsupDalc,famsupWalc,famsuphealth,famsupabsences,famsupaddress_type_Urban,famsupfamsize_BIG_FAMILY,famsupPstatus_Together,famsupMjob_at_home,famsupMjob_health,famsupMjob_services,famsupMjob_teacher,...,Fjob_teacher4reason_home,Fjob_teacher4reason_reputation,Fjob_teacher4tutor,Fjob_teacher4super,Fjob_teacher3reason_course2,Fjob_teacher3reason_coursereason_home,Fjob_teacher3reason_coursereason_reputation,Fjob_teacher3reason_coursetutor,Fjob_teacher3reason_coursesuper,Fjob_teacher3reason_home2,Fjob_teacher3reason_homereason_reputation,Fjob_teacher3reason_hometutor,Fjob_teacher3reason_homesuper,Fjob_teacher3reason_reputation2,Fjob_teacher3reason_reputationtutor,Fjob_teacher3reason_reputationsuper,Fjob_teacher3tutor2,Fjob_teacher3tutorsuper,Fjob_teacher3super2,Fjob_teacher2reason_course3,Fjob_teacher2reason_course2reason_home,Fjob_teacher2reason_course2reason_reputation,Fjob_teacher2reason_course2tutor,Fjob_teacher2reason_course2super,Fjob_teacher2reason_coursereason_home2,Fjob_teacher2reason_coursereason_homereason_reputation,Fjob_teacher2reason_coursereason_hometutor,Fjob_teacher2reason_coursereason_homesuper,Fjob_teacher2reason_coursereason_reputation2,Fjob_teacher2reason_coursereason_reputationtutor,Fjob_teacher2reason_coursereason_reputationsuper,Fjob_teacher2reason_coursetutor2,Fjob_teacher2reason_coursetutorsuper,Fjob_teacher2reason_coursesuper2,Fjob_teacher2reason_home3,Fjob_teacher2reason_home2reason_reputation,Fjob_teacher2reason_home2tutor,Fjob_teacher2reason_home2super,Fjob_teacher2reason_homereason_reputation2,Fjob_teacher2reason_homereason_reputationtutor,Fjob_teacher2reason_homereason_reputationsuper,Fjob_teacher2reason_hometutor2,Fjob_teacher2reason_hometutorsuper,Fjob_teacher2reason_homesuper2,Fjob_teacher2reason_reputation3,Fjob_teacher2reason_reputation2tutor,Fjob_teacher2reason_reputation2super,Fjob_teacher2reason_reputationtutor2,Fjob_teacher2reason_reputationtutorsuper,Fjob_teacher2reason_reputationsuper2,Fjob_teacher2tutor3,Fjob_teacher2tutor2super,Fjob_teacher2tutorsuper2,Fjob_teacher2super3,Fjob_teacherreason_course4,Fjob_teacherreason_course3reason_home,Fjob_teacherreason_course3reason_reputation,Fjob_teacherreason_course3tutor,Fjob_teacherreason_course3super,Fjob_teacherreason_course2reason_home2,Fjob_teacherreason_course2reason_homereason_reputation,Fjob_teacherreason_course2reason_hometutor,Fjob_teacherreason_course2reason_homesuper,Fjob_teacherreason_course2reason_reputation2,Fjob_teacherreason_course2reason_reputationtutor,Fjob_teacherreason_course2reason_reputationsuper,Fjob_teacherreason_course2tutor2,Fjob_teacherreason_course2tutorsuper,Fjob_teacherreason_course2super2,Fjob_teacherreason_coursereason_home3,Fjob_teacherreason_coursereason_home2reason_reputation,Fjob_teacherreason_coursereason_home2tutor,Fjob_teacherreason_coursereason_home2super,Fjob_teacherreason_coursereason_homereason_reputation2,Fjob_teacherreason_coursereason_homereason_reputationtutor,Fjob_teacherreason_coursereason_homereason_reputationsuper,Fjob_teacherreason_coursereason_hometutor2,Fjob_teacherreason_coursereason_hometutorsuper,Fjob_teacherreason_coursereason_homesuper2,Fjob_teacherreason_coursereason_reputation3,Fjob_teacherreason_coursereason_reputation2tutor,Fjob_teacherreason_coursereason_reputation2super,Fjob_teacherreason_coursereason_reputationtutor2,Fjob_teacherreason_coursereason_reputationtutorsuper,Fjob_teacherreason_coursereason_reputationsuper2,Fjob_teacherreason_coursetutor3,Fjob_teacherreason_coursetutor2super,Fjob_teacherreason_coursetutorsuper2,Fjob_teacherreason_coursesuper3,Fjob_teacherreason_home4,Fjob_teacherreason_home3reason_reputation,Fjob_teacherreason_home3tutor,Fjob_teacherreason_home3super,Fjob_teacherreason_home2reason_reputation2,Fjob_teacherreason_home2reason_reputationtutor,Fjob_teacherreason_home2reason_reputationsuper,Fjob_teacherreason_home2tutor2,Fjob_teacherreason_home2tutorsuper,Fjob_teacherreason_home2super2,Fjob_teacherreason_homereason_reputation3,Fjob_teacherreason_homereason_reputation2tutor,Fjob_teacherreason_homereason_reputation2super,Fjob_teacherreason_homereason_reputationtutor2,Fjob_teacherreason_homereason_reputationtutorsuper,Fjob_teacherreason_homereason_reputationsuper2,Fjob_teacherreason_hometutor3,Fjob_teacherreason_hometutor2super,Fjob_teacherreason_hometutorsuper2,Fjob_teacherreason_homesuper3,Fjob_teacherreason_reputation4,Fjob_teacherreason_reputation3tutor,Fjob_teacherreason_reputation3super,Fjob_teacherreason_reputation2tutor2,Fjob_teacherreason_reputation2tutorsuper,Fjob_teacherreason_reputation2super2,Fjob_teacherreason_reputationtutor3,Fjob_teacherreason_reputationtutor2super,Fjob_teacherreason_reputationtutorsuper2,Fjob_teacherreason_reputationsuper3,Fjob_teachertutor4,Fjob_teachertutor3super,Fjob_teachertutor2super2,Fjob_teachertutorsuper3,Fjob_teachersuper4,reason_course5,reason_course4reason_home,reason_course4reason_reputation,reason_course4tutor,reason_course4super,reason_course3reason_home2,reason_course3reason_homereason_reputation,reason_course3reason_hometutor,reason_course3reason_homesuper,reason_course3reason_reputation2,reason_course3reason_reputationtutor,reason_course3reason_reputationsuper,reason_course3tutor2,reason_course3tutorsuper,reason_course3super2,reason_course2reason_home3,reason_course2reason_home2reason_reputation,reason_course2reason_home2tutor,reason_course2reason_home2super,reason_course2reason_homereason_reputation2,reason_course2reason_homereason_reputationtutor,reason_course2reason_homereason_reputationsuper,reason_course2reason_hometutor2,reason_course2reason_hometutorsuper,reason_course2reason_homesuper2,reason_course2reason_reputation3,reason_course2reason_reputation2tutor,reason_course2reason_reputation2super,reason_course2reason_reputationtutor2,reason_course2reason_reputationtutorsuper,reason_course2reason_reputationsuper2,reason_course2tutor3,reason_course2tutor2super,reason_course2tutorsuper2,reason_course2super3,reason_coursereason_home4,reason_coursereason_home3reason_reputation,reason_coursereason_home3tutor,reason_coursereason_home3super,reason_coursereason_home2reason_reputation2,reason_coursereason_home2reason_reputationtutor,reason_coursereason_home2reason_reputationsuper,reason_coursereason_home2tutor2,reason_coursereason_home2tutorsuper,reason_coursereason_home2super2,reason_coursereason_homereason_reputation3,reason_coursereason_homereason_reputation2tutor,reason_coursereason_homereason_reputation2super,reason_coursereason_homereason_reputationtutor2,reason_coursereason_homereason_reputationtutorsuper,reason_coursereason_homereason_reputationsuper2,reason_coursereason_hometutor3,reason_coursereason_hometutor2super,reason_coursereason_hometutorsuper2,reason_coursereason_homesuper3,reason_coursereason_reputation4,reason_coursereason_reputation3tutor,reason_coursereason_reputation3super,reason_coursereason_reputation2tutor2,reason_coursereason_reputation2tutorsuper,reason_coursereason_reputation2super2,reason_coursereason_reputationtutor3,reason_coursereason_reputationtutor2super,reason_coursereason_reputationtutorsuper2,reason_coursereason_reputationsuper3,reason_coursetutor4,reason_coursetutor3super,reason_coursetutor2super2,reason_coursetutorsuper3,reason_coursesuper4,reason_home5,reason_home4reason_reputation,reason_home4tutor,reason_home4super,reason_home3reason_reputation2,reason_home3reason_reputationtutor,reason_home3reason_reputationsuper,reason_home3tutor2,reason_home3tutorsuper,reason_home3super2,reason_home2reason_reputation3,reason_home2reason_reputation2tutor,reason_home2reason_reputation2super,reason_home2reason_reputationtutor2,reason_home2reason_reputationtutorsuper,reason_home2reason_reputationsuper2,reason_home2tutor3,reason_home2tutor2super,reason_home2tutorsuper2,reason_home2super3,reason_homereason_reputation4,reason_homereason_reputation3tutor,reason_homereason_reputation3super,reason_homereason_reputation2tutor2,reason_homereason_reputation2tutorsuper,reason_homereason_reputation2super2,reason_homereason_reputationtutor3,reason_homereason_reputationtutor2super,reason_homereason_reputationtutorsuper2,reason_homereason_reputationsuper3,reason_hometutor4,reason_hometutor3super,reason_hometutor2super2,reason_hometutorsuper3,reason_homesuper4,reason_reputation5,reason_reputation4tutor,reason_reputation4super,reason_reputation3tutor2,reason_reputation3tutorsuper,reason_reputation3super2,reason_reputation2tutor3,reason_reputation2tutor2super,reason_reputation2tutorsuper2,reason_reputation2super3,reason_reputationtutor4,reason_reputationtutor3super,reason_reputationtutor2super2,reason_reputationtutorsuper3,reason_reputationsuper4,tutor5,tutor4super,tutor3super2,tutor2super3,tutorsuper4,super5
0,4.0,4.0,2.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,4.0,1.0,1.0,3.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,16.0,16.0,8.0,8.0,0.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,16.0,12.0,16.0,4.0,4.0,12.0,16.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,64.0,16.0,8.0,8.0,0.0,4.0,0.0,0.0,4.0,4.0,0.0,0.0,16.0,12.0,16.0,4.0,4.0,12.0,16.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,64.0,4.0,4.0,0.0,2.0,0.0,0.0,2.0,2.0,0.0,0.0,8.0,6.0,8.0,2.0,2.0,6.0,8.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,32.0,4.0,0.0,2.0,0.0,0.0,2.0,2.0,0.0,0.0,8.0,6.0,8.0,2.0,2.0,6.0,8.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,4.0,1.0,1.0,3.0,4.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,256.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,256.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4096.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,256.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4096.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,65536.0,1.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,256.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4096.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,65536.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1048576.0
1,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,1.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,4.0,0.0,0.0,2.0,0.0,0.0,2.0,2.0,0.0,10.0,6.0,6.0,2.0,2.0,6.0,4.0,2.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,5.0,3.0,3.0,1.0,1.0,3.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,15.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,240.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3723.875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57720.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,894661.0
2,1.0,1.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,1.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,1.0,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,4.0,0.0,2.0,0.0,0.0,2.0,2.0,2.0,0.0,8.0,6.0,4.0,4.0,6.0,6.0,12.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,6.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23730.47
3,4.0,2.0,1.0,3.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,16.0,8.0,4.0,12.0,0.0,0.0,4.0,4.0,4.0,4.0,4.0,4.0,12.0,8.0,8.0,4.0,4.0,20.0,0.0,4.0,4.0,4.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,106.0,4.0,2.0,6.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,2.0,6.0,4.0,4.0,2.0,2.0,10.0,0.0,2.0,2.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,0.0,0.0,53.0,1.0,3.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,26.5,9.0,0.0,0.0,3.0,3.0,3.0,3.0,3.0,3.0,9.0,6.0,6.0,3.0,3.0,15.0,0.0,3.0,3.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,0.0,0.0,79.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,5.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,26.5,0.0,0.0,0.0,0.0,0.0,702.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18609.625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,493155.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13068610.0
4,3.0,3.0,1.0,2.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,2.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,9.0,9.0,3.0,6.0,0.0,0.0,3.0,0.0,3.0,3.0,0.0,0.0,12.0,9.0,6.0,3.0,6.0,15.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,64.5,9.0,3.0,6.0,0.0,0.0,3.0,0.0,3.0,3.0,0.0,0.0,12.0,9.0,6.0,3.0,6.0,15.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,64.5,1.0,2.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,2.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,4.0,0.0,0.0,2.0,0.0,2.0,2.0,0.0,0.0,8.0,6.0,4.0,2.0,4.0,10.0,0.0,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,43.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,4.0,3.0,2.0,1.0,2.0,5.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,21.5,0.0,0.0,0.0,0.0,0.0,462.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9938.375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,213675.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4594014.0


In [91]:
X_train, X_test, y_train, y_test = train_test_split(
    df_poly5, target, random_state=9,test_size=0.2
         )
scaler2 = StandardScaler()
# fit the scaler to the training data
scaler2.fit(X_train)

#transform the training data
X_train = pd.DataFrame(data=scaler2.transform(X_train), columns=df_poly5.columns)

#transform the testing dat
X_test = pd.DataFrame(data=scaler2.transform(X_test), columns=df_poly5.columns)


In [92]:
# instantiate a linear regression object
lm_5 = LinearRegression()

#fit the linear regression to the data
lm_5 = lm_5.fit(X_train, y_train)

y_train_pred = lm_5.predict(X_train)

train_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))


print('Training Root Mean Squared Error:' , train_rmse)

Training Root Mean Squared Error: 2.382274783466104e-12


In [103]:
selector = SelectKBest(f_regression, k=30)

selector.fit(X_train, y_train)

  corr /= X_norms
  cond2 = cond0 & (x <= _a)


SelectKBest(k=30, score_func=<function f_regression at 0x7fbca0b4e9d0>)

In [104]:
selected_columns = X_train.columns[selector.get_support()]
removed_columns = X_train.columns[~selector.get_support()]

In [105]:
ols_selected_cols = list(selected_columns)

In [106]:
#instantiate a linear regression object
lm_kbest = LinearRegression()

#fit the linear regression to the data
lm_kbest = lm_kbest.fit(X_train[selected_columns], y_train)

y_train_kbest = lm_kbest.predict(X_train[selected_columns])


trainK_rmse = np.sqrt(metrics.mean_squared_error(y_train, y_train_kbest))


print('Training Root Mean Squared Error:' , trainK_rmse)

y_kbest = lm_kbest.predict(X_test[selected_columns])

testK_rmse = np.sqrt(metrics.mean_squared_error(y_test, y_kbest))

print('Testing Root Mean Squared Error:' , testK_rmse)




Training Root Mean Squared Error: 2.812494190922182
Testing Root Mean Squared Error: 2.6419146587113875


In [107]:
'+'.join(ols_selected_cols)

'failures+studytimehigher+failuresnursery+failuresfamrel+failuresfreetime+failuresgoout+failuresWalc+failuresfamsize_BIG_FAMILY+studytimehigher2+failuresnursery2+failuresnurserygoout+failuresfreetimegoout+failuresfreetimeWalc+failuresfamsize_BIG_FAMILY2+studytimehigher3+studytimehigherinternetaddress_type_Urban+failuresnursery3+failuresnursery2goout+failuresfreetime2goout+failuresfreetime2Walc+failuresfreetimegooutWalc+failuresfamsize_BIG_FAMILY3+traveltimefailuresfreetime2Walc+studytimehigher4+studytimehigher2internetaddress_type_Urban+studytimehigherinternet2address_type_Urban+studytimehigherinternetaddress_type_Urban2+failuresnursery4+failuresnursery3goout+failuresfamsize_BIG_FAMILY4'

In [109]:
ols_selected_cols
X_train['g3'] = p.G3
crazy_line = ols(data = X_train, formula = 'g3~failures+studytimehigher+failuresnursery+failuresfamrel+failuresfreetime+failuresgoout+failuresWalc+failuresfamsize_BIG_FAMILY+studytimehigher2+failuresnursery2+failuresnurserygoout+failuresfreetimegoout+failuresfreetimeWalc+failuresfamsize_BIG_FAMILY2+studytimehigher3+studytimehigherinternetaddress_type_Urban+failuresnursery3+failuresnursery2goout+failuresfreetime2goout+failuresfreetime2Walc+failuresfreetimegooutWalc+failuresfamsize_BIG_FAMILY3+traveltimefailuresfreetime2Walc+studytimehigher4+studytimehigher2internetaddress_type_Urban+studytimehigherinternet2address_type_Urban+studytimehigherinternetaddress_type_Urban2+failuresnursery4+failuresnursery3goout+failuresfamsize_BIG_FAMILY4').fit()
crazy_line.summary()

0,1,2,3
Dep. Variable:,g3,R-squared:,0.022
Model:,OLS,Adj. R-squared:,-0.009
Method:,Least Squares,F-statistic:,0.7052
Date:,"Sun, 13 Sep 2020",Prob (F-statistic):,0.79
Time:,03:01:02,Log-Likelihood:,-1250.2
No. Observations:,519,AIC:,2534.0
Df Residuals:,502,BIC:,2607.0
Df Model:,16,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,12.2987,0.120,102.400,0.000,12.063,12.535
failures,1.3809,1.234,1.119,0.264,-1.044,3.805
studytimehigher,0.0161,0.037,0.438,0.662,-0.056,0.088
failuresnursery,-0.1504,0.177,-0.847,0.397,-0.499,0.198
failuresfamrel,-0.4219,0.559,-0.755,0.451,-1.520,0.676
failuresfreetime,-1.4578,1.078,-1.353,0.177,-3.575,0.660
failuresgoout,-3.0913,3.081,-1.003,0.316,-9.145,2.962
failuresWalc,2.3956,3.088,0.776,0.438,-3.671,8.462
failuresfamsize_BIG_FAMILY,0.0724,0.074,0.985,0.325,-0.072,0.217

0,1,2,3
Omnibus:,20.675,Durbin-Watson:,1.59
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33.58
Skew:,-0.294,Prob(JB):,5.11e-08
Kurtosis:,4.099,Cond. No.,1.99e+16


In [98]:
crazy_line.summary()

NameError: name 'crazy_line' is not defined

ERROR! Session/line number was not unique in database. History logging moved to new session 175
