In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge,Lasso,RidgeCV,LassoCV,ElasticNetCV,ElasticNet,LinearRegression
from sklearn.model_selection import train_test_split
import statsmodels.api as sm
from pandas_profiling import ProfileReport
from statsmodels.stats.outliers_influence import variance_inflation_factor
import pickle

In [None]:
df = pd.read_csv('ai4i2020.csv')
df.head()

In [None]:
pf = ProfileReport(df)
pf.to_widgets()
pf.to_file("templates/report.html")

In [None]:
df=df.iloc[:,3:]
df.head()

In [None]:
df.describe()

In [None]:
x = df.drop(columns=['Air temperature [K]'])
x.head()

In [None]:
y=df['Air temperature [K]']
y.head()

In [None]:
scaler = StandardScaler()

In [None]:
arr = scaler.fit_transform(x)
arr

In [None]:
df1 = pd.DataFrame(arr)
df1.head()

In [None]:
vif_df = pd.DataFrame()
vif_df['vif'] = [variance_inflation_factor(arr,i)for i in range(arr.shape[1])]
vif_df['feature'] = x.columns
vif_df

In [None]:
# Here VIF value of Machine failure column is greater than 10 so we drop column

In [None]:
x = x.drop(columns=['Machine failure'])
x.head()

In [None]:
arr1 = scaler.fit_transform(x)
arr1

In [None]:
df1 = pd.DataFrame(arr1)
df1.head()

In [None]:
vif_df1 = pd.DataFrame()
vif_df1['vif'] = [variance_inflation_factor(arr1,i)for i in range(arr1.shape[1])]
vif_df1['feature'] = x.columns
vif_df1

In [None]:
X_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.25,random_state = 20)

In [None]:
X_train

In [None]:
lr = LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
lr.score(x_test,y_test)

In [None]:
def adj_r2(x,y):
    r2 = lr.score(x,y)
    n = x.shape[0]
    p = x.shape[1]
    adjusted_r2 = 1-(1-r2)*(n-1)/(n-p-1)
    return adjusted_r2

In [None]:
adj_r2(x_test,y_test)

In [None]:
lr.coef_

In [None]:
lr.intercept_

In [None]:
lassocv = LassoCV(alphas=None,cv = 10,max_iter=20000000,normalize=True)
lassocv.fit(X_train,y_train)

In [None]:
lassocv.alpha_

In [None]:
lass = Lasso(alpha=lassocv.alpha_)
lass.fit(X_train,y_train)

In [None]:
lass.score(x_test,y_test)

In [None]:
alpha = np.random.uniform(0,10,50)

In [None]:
ridgecv = RidgeCV(alphas=alpha,cv = 10,normalize=True)
ridgecv.fit(X_train,y_train)

In [None]:
ridgecv.alpha_

In [None]:
ridge_lr = Ridge(alpha=ridgecv.alpha_)
ridge_lr.fit(X_train,y_train)

In [None]:
ridge_lr.score(x_test,y_test)

In [None]:
elastic = ElasticNetCV(alphas=None,cv = 10)
elastic.fit(X_train,y_train)

In [None]:
elastic.alpha_

In [None]:
elastic.l1_ratio_

In [None]:
elastic_lr = ElasticNet(alpha=elastic.alpha_,l1_ratio=elastic.l1_ratio_)

In [None]:
elastic_lr.fit(X_train,y_train)

In [None]:
elastic_lr.score(x_test,y_test)

In [None]:
result= {"Linear Regression Score: ":lr.score(x_test,y_test),
         "Lasso Regression Score: ":lass.score(x_test,y_test),
         "Ridge Regression Score: ":ridge_lr.score(x_test,y_test),
         "ElasticNet Regression Score: ":elastic_lr.score(x_test,y_test)}

In [None]:
print(result)

In [None]:
lr.predict([[308.7,1408,46.3,3,0,0,0,0,0]])

In [None]:
elastic_lr.predict([[308.7,1408,46.3,3,0,0,0,0,0]])

In [None]:
lass.predict([[308.7,1408,46.3,3,0,0,0,0,0]])

In [None]:
pickle.dump(lass,open('challenge.pickle','wb'))

In [None]:
model =  pickle.load(open('challenge.pickle','rb'))

In [None]:
model.predict([[308.7,1408,46.3,3,0,0,0,0,0]])

In [None]:
!pip freeze > requirements.txt