# Feature Engineering And Model Training

## 1. Importing Cleaned Data

In [72]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [73]:
df = pd.read_csv("clean_data.csv")

In [74]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106159 entries, 0 to 106158
Data columns (total 59 columns):
 #   Column                              Non-Null Count   Dtype  
---  ------                              --------------   -----  
 0   EstimatedEffectiveYield             106159 non-null  float64
 1   EstimatedLoss                       106159 non-null  float64
 2   EstimatedReturn                     106159 non-null  float64
 3   ProsperRating (numeric)             106159 non-null  float64
 4   ProsperScore                        106159 non-null  float64
 5   Term                                106159 non-null  int64  
 6   LoanStatus                          106159 non-null  object 
 7   BorrowerRate                        106159 non-null  float64
 8   LenderYield                         106159 non-null  float64
 9   ProsperRating (Alpha)               106159 non-null  object 
 10  ListingCategory (numeric)           106159 non-null  int64  
 11  IsBorrowerHomeowner       

In [75]:
df.head()

Unnamed: 0,EstimatedEffectiveYield,EstimatedLoss,EstimatedReturn,ProsperRating (numeric),ProsperScore,Term,LoanStatus,BorrowerRate,LenderYield,ProsperRating (Alpha),...,OpenCreditLines,AmountDelinquent,PublicRecordsLast12Months,RevolvingCreditBalance,BankcardUtilization,AvailableBankcardCredit,TotalTrades,TradesNeverDelinquent (percentage),TradesOpenedLast6Months,DebtToIncomeRatio
0,0.168661,0.080306,0.096068,4.072243,5.950067,36,Completed,0.158,0.138,Missing,...,4.0,472.0,0.0,0.0,0.0,1500.0,11.0,0.81,0.0,0.17
1,0.0796,0.0249,0.0547,6.0,7.0,36,Current,0.092,0.082,A,...,14.0,0.0,0.0,3989.0,0.21,10266.0,29.0,1.0,2.0,0.18
2,0.168661,0.080306,0.096068,4.072243,5.950067,36,Completed,0.275,0.24,Missing,...,9.260164,984.507059,0.015094,17598.706751,0.561309,11210.225447,23.230034,0.885897,0.802327,0.06
3,0.0849,0.0249,0.06,6.0,9.0,36,Current,0.0974,0.0874,A,...,5.0,10056.0,0.0,1444.0,0.04,30754.0,26.0,0.76,0.0,0.15
4,0.18316,0.0925,0.09066,3.0,4.0,36,Current,0.2085,0.1985,D,...,19.0,0.0,0.0,6193.0,0.81,695.0,39.0,0.95,2.0,0.26


## 2. Creating EMI variable

In [76]:
P = df['LP_CustomerPrincipalPayments']
n = df['Term']
r = df['BorrowerRate']

In [77]:
def cal_EMI(P,r,n):
    P = P.values
    r = r.values
    n = n.values
    result1 = np.empty(0)
    result2 = np.empty(0)
    result = np.empty(0)
    for i in range(P.shape[0]):
        result1 = np.append(result1, P[i]*r[i]*np.power((1+r[i]),n[i]))
        result2 = np.append(result2, np.power((1+r[i]),n[i])-1)
        result = np.append(result,(result1[i]/result2[i]))
    df['EMI'] = result
    return result

In [78]:
cal_EMI(P,r,n).size

  result = np.append(result,(result1[i]/result2[i]))


106159

In [83]:
A = df['LoanOriginalAmount']
N = df['Term']
R = df['BorrowerRate']
I = df['StatedMonthlyIncome']
L = df['LoanStatus']

In [80]:
df['LoanStatus']=(df['LoanStatus']=='Completed').astype(int)
df.fillna(0,inplace=True)

In [84]:
def cal_risk(A,R,N,I,L):
    A = A.values
    R = R.values
    N = N.values
    I = I.values
    L = L.values
    Total_payment_due = np.empty(0)
    Max_allowed_amount = np.empty(0)
    Risk = np.empty(0)
    ELA = np.empty(0)
    for i in range(A.shape[0]):
        Total_payment_due = np.append(Total_payment_due,(A[i]+(A[i]*R[i])*N[i]))
        Max_allowed_amount = np.append(Max_allowed_amount,(I[i]*12*30)/100)
        if(Total_payment_due[i]<=Max_allowed_amount[i]):
            ELA = np.append(ELA,A[i])
        else:
            ELA = np.append(ELA,Max_allowed_amount[i])
        if(Max_allowed_amount[i]<Total_payment_due[i] and L[i]==1):
            Risk = np.append(Risk,2)
        elif(Max_allowed_amount[i]<Total_payment_due[i] and L[i]==0):
            Risk = np.append(Risk,4)
        elif(Max_allowed_amount[i]>Total_payment_due[i] and L[i]==1):
            Risk = np.append(Risk,1)
        elif(Max_allowed_amount[i]>Total_payment_due[i] and L[i]==0):
            Risk = np.append(Risk,3)
        elif(Max_allowed_amount[i]==Total_payment_due[i] and L[i]==1):
            Risk = np.append(Risk,2)
        elif(Max_allowed_amount[i]==Total_payment_due[i] and L[i]==0):
            Risk = np.append(Risk,3)
    df['Riskk'] = Risk
    return Risk

In [85]:
cal_risk(A,R,N,I,L).size

106159

## 3. Creating ELA variable

In [86]:
A = df['LoanOriginalAmount']
N = df['Term']
R = df['BorrowerRate']
I = df['StatedMonthlyIncome']
L = df['LoanStatus']

In [87]:
def cal_ELA(A,N,R,I):
    A = A.values
    R = R.values
    N = N.values
    I = I.values
    Total_payment_due = np.empty(0)
    Max_allowed_amount = np.empty(0)
    ELA = np.empty(0)
    for i in range(A.shape[0]):
        Total_payment_due = np.append(Total_payment_due,(A[i]+(A[i]*R[i]))*N[i])
        Max_allowed_amount = np.append(Max_allowed_amount,(I[i]*12*30)/100)
        if(Total_payment_due[i]<=Max_allowed_amount[i]):
            ELA = np.append(ELA,A[i])
        else:
            ELA = np.append(ELA,Max_allowed_amount[i])
    df['ELA'] = ELA
    return ELA

In [88]:
cal_ELA(A,N,R,I).size

106159

## 4. Creating PROI variable

In [90]:
temp = pd.read_csv('C:/Users/atharv/Downloads/EMail/prosperLoanData.csv')
temp['CreditGrade'].fillna("UN",inplace = True)
credit = temp['CreditGrade']
df['CreditGrade'] = credit

In [91]:
def cal_PROI(df,credit):
    df['InterestAmount'] = (df['LoanOriginalAmount']*df['BorrowerRate'])
    df['TotalAmount'] = (df['InterestAmount']+df['LoanOriginalAmount'])
    df['ROI'] = (df['InterestAmount']/df['TotalAmount'])
    
    df['PROI'] = df['ROI'].median()
    
    for i in range(df.shape[0]):
        if df['CreditGrade'][i] == 'E':
            df['PROI'].iloc[i] = df['PROI'].iloc[i] + 0.05
        elif df['CreditGrade'][i] == 'HR':
            df['PROI'].iloc[i] = df['PROI'].iloc[i] + 0.05
        elif df['CreditGrade'][i] == 'A':
            df['PROI'].iloc[i] = df['PROI'].iloc[i] - 0.05
        elif df['CreditGrade'][i] == 'AA':
            df['PROI'].iloc[i] = df['PROI'].iloc[i] - 0.1
        
        if(df['TradesNeverDelinquent (percentage)'].iloc[i])<0.7:
            df['PROI'].iloc[i] = df['PROI'].iloc[i] + 0.05
        
        if(df['AvailableBankcardCredit'].iloc[i])<=800.0:
            df['PROI'].iloc[i] = df['PROI'].iloc[i] + 0.05
        elif (df['AvailableBankcardCredit'].iloc[i])>=16500:
            df['PROI'].iloc[i] = df['PROI'].iloc[i] - 0.05
     
        if(df['TotalInquiries'].iloc[i])>28:
            df['PROI'].iloc[i] = df['PROI'].iloc[i] + 0.1
        elif (df['TotalInquiries'].iloc[i])>12:
            df['PROI'].iloc[i] = df['PROI'].iloc[i] + 0.05
        
    return df['PROI'].values
        

In [92]:
cal_PROI(df,credit).size

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


106159

In [93]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 106159 entries, 0 to 106158
Data columns (total 67 columns):
 #   Column                              Non-Null Count   Dtype  
---  ------                              --------------   -----  
 0   EstimatedEffectiveYield             106159 non-null  float64
 1   EstimatedLoss                       106159 non-null  float64
 2   EstimatedReturn                     106159 non-null  float64
 3   ProsperRating (numeric)             106159 non-null  float64
 4   ProsperScore                        106159 non-null  float64
 5   Term                                106159 non-null  int64  
 6   LoanStatus                          106159 non-null  int32  
 7   BorrowerRate                        106159 non-null  float64
 8   LenderYield                         106159 non-null  float64
 9   ProsperRating (Alpha)               106159 non-null  object 
 10  ListingCategory (numeric)           106159 non-null  int64  
 11  IsBorrowerHomeowner       

In [94]:
df.drop(['TotalAmount','InterestAmount','ROI'],axis=1,inplace=True)

In [97]:
df.drop(['LP_NonPrincipalRecoverypayments','PercentFunded','InvestmentFromFriendsCount','InvestmentFromFriendsAmount','TradesOpenedLast6Months','InquiriesLast6Months'],axis =1,inplace=True)

In [98]:
df.drop(['EstimatedEffectiveYield','EstimatedLoss','EstimatedReturn','LenderYield','LoanStatus'],axis=1,inplace=True)

In [99]:
df['IsBorrowerHomeowner'] = (df['IsBorrowerHomeowner']==True).astype(int)
df['CurrentlyInGroup'] = (df['CurrentlyInGroup']==True).astype(int)
df['IncomeVerifiable'] = (df['IncomeVerifiable']==True).astype(int)

In [106]:
df.drop(['Recommendations','PublicRecordsLast12Months','LoanMonthsSinceOrigination','LoanCurrentDaysDelinquent','TotalTrades'],axis=1,inplace=True)

In [109]:
df.drop(['MonthlyLoanPayment','LP_CustomerPayments','LP_InterestandFees','LP_ServiceFees','LP_GrossPrincipalLoss','LP_NetPrincipalLoss','LP_CollectionFees'],axis =1, inplace=True)

In [113]:
df.to_csv('tf_data.csv',index=False)

In [114]:
y = df[['EMI','ELA','PROI','Riskk']]
df.drop(['EMI','ELA','PROI','Riskk'],axis=1,inplace=True)

## 5. Categorical/Label Encoding

In [35]:
categorical_columns = df.select_dtypes(include=["object"]).columns.values
category_columns=df[categorical_columns]

In [36]:
#one hot encoding
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(drop='first')
enc=ohe.fit_transform(category_columns[['CreditGrade','BorrowerState','Occupation','EmploymentStatus']]).toarray()

In [37]:
Nominal_encoding=pd.DataFrame(enc)

In [38]:
#ordinal encoding
from sklearn.preprocessing import OrdinalEncoder
oe=OrdinalEncoder(categories=[['Missing','HR','E','D','C','B','A','AA'],['Not displayed','$0','Not employed','$1-24,999','$75,000-99,999','$100,000+','$50,000-74,999','$25,000-49,999']])
oe.fit(category_columns[['ProsperRating (Alpha)','IncomeRange']])

In [39]:
Ordinal_encoding=pd.DataFrame(oe.transform(category_columns[['ProsperRating (Alpha)','IncomeRange']]))

In [40]:
total_categorical_frame = pd.DataFrame(np.hstack((Nominal_encoding.values,Ordinal_encoding.values)))

In [41]:
numerical_columns = df.select_dtypes(exclude=["object"]).columns.values
total_numerical_frame=df[numerical_columns]

In [43]:
data_ready_for_scaling=pd.DataFrame(np.hstack((total_numerical_frame.values,total_categorical_frame.values)))

In [44]:
x = data_ready_for_scaling

## 6. Predicting Risk Status

In [46]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y[['Riskk']], shuffle = True,test_size=0.33,random_state=100)

In [47]:
#Applying Standardization
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaler.fit(pd.DataFrame(X_train))
X_train_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_train)))
X_test_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_test)))

In [48]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix

dtree_model = DecisionTreeClassifier(max_depth = 4).fit(X_train_scaled, y_train)

dtree_predictions = dtree_model.predict(X_test_scaled)
acc = dtree_model.score(X_test_scaled,y_test)

In [49]:
acc

0.8762880712471098

## 7. Predicting ELA (Eligible Loan Amount)

In [51]:
#Splitting the training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y['ELA'], shuffle = True,test_size=0.33,random_state=100)

In [52]:
#Applying Standardization
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaler.fit(pd.DataFrame(X_train))
X_train_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_train)))
X_test_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_test)))

In [53]:
from sklearn.linear_model import LinearRegression
# Import mean squared error
from sklearn.metrics import mean_squared_error
# Initiate the model
from time import time
start = time()
# Fit the model to our dataset
model = LinearRegression()
reg=model.fit(X_train_scaled, y_train)
test_pred=reg.predict(X_test_scaled)
test_mse = mean_squared_error(y_test, test_pred)
accuracy=model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse} ")

The duration of training is: 1.7315545082092285
R_squared for the validation data is:0.5672023209773261
The mean squared error for the testing model is:96126646.24855421 


In [54]:
# Import Ridge Regression algorithm
from sklearn.linear_model import Ridge
# Initiate the model
start = time()
ridg_model = Ridge(alpha=1.0)
# Fit the model to our dataset
ridg_model.fit(X_train_scaled, y_train)
test_preds = ridg_model.predict(X_test_scaled)
test_mse_ridg = mean_squared_error(y_test, test_preds)
accuracy=ridg_model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse_ridg} ")

The duration of training is: 0.2131352424621582
R_squared for the validation data is:0.5672144800332808
The mean squared error for the testing model is:96123945.65812333 


In [55]:
# Import Lasso Regression algorithm
from sklearn.linear_model import Lasso
# Initiate the model
start = time()
las_model = Lasso(alpha = 1, max_iter=2000, random_state=10)
# Fit the model to our dataset
las_model.fit(X_train_scaled, y_train)
test_preds = las_model.predict(X_test_scaled)
test_mse_las = mean_squared_error(y_test, test_preds)
accuracy=las_model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse_las} ")


The duration of training is: 95.34652876853943
R_squared for the validation data is:0.5672461108489358
The mean squared error for the testing model is:96116920.28720663 


In [56]:
# Import  Random Forest Regressor algorithm
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,r2_score
# Initiate the model
start = time()
ran_model = RandomForestRegressor(n_estimators = 100)
# Fit the model to our dataset
ran_model.fit(X_train_scaled, y_train)
test_predsr = ran_model.predict(X_test_scaled)
accuracy=r2_score(y_test, test_predsr)
test_mse_ran = mean_squared_error(y_test, test_predsr)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the model is:{test_mse_ran} ")

  ran_model.fit(X_train_scaled, y_train)


The duration of training is: 262.61998867988586
R_squared for the validation data is:0.9376158377037085
The mean squared error for the model is:13855851.339382632 


## 8. Predicitng EMI (Equated Monthly Income)

In [58]:
#Splitting the training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y['EMI'], shuffle = True,test_size=0.33,random_state=100)

In [59]:
#Applying Standardization
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaler.fit(pd.DataFrame(X_train))
X_train_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_train)))
X_test_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_test)))

In [60]:
from sklearn.linear_model import LinearRegression
# Import mean squared error
from sklearn.metrics import mean_squared_error
# Initiate the model
from time import time
start = time()
# Fit the model to our dataset
model = LinearRegression()
reg=model.fit(X_train_scaled, y_train)
test_pred=reg.predict(X_test_scaled)
test_mse = mean_squared_error(y_test, test_pred)
accuracy=model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse} ")

The duration of training is: 1.8918824195861816
R_squared for the validation data is:0.9161885726733131
The mean squared error for the testing model is:43372.227471152575 


In [61]:
# Import Ridge Regression algorithm
from sklearn.linear_model import Ridge
# Initiate the model
start = time()
ridg_model = Ridge(alpha=1.0)
# Fit the model to our dataset
ridg_model.fit(X_train_scaled, y_train)
test_preds = ridg_model.predict(X_test_scaled)
test_mse_ridg = mean_squared_error(y_test, test_preds)
accuracy=ridg_model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse_ridg} ")

The duration of training is: 0.19895315170288086
R_squared for the validation data is:0.9161786507718461
The mean squared error for the testing model is:43377.3620331222 


In [62]:
# Import Lasso Regression algorithm
from sklearn.linear_model import Lasso
# Initiate the model
start = time()
las_model = Lasso(alpha = 1, max_iter=2000, random_state=10)
# Fit the model to our dataset
las_model.fit(X_train_scaled, y_train)
test_preds = las_model.predict(X_test_scaled)
test_mse_las = mean_squared_error(y_test, test_preds)
accuracy=las_model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse_las} ")


The duration of training is: 6.322042226791382
R_squared for the validation data is:0.9157565416919402
The mean squared error for the testing model is:43595.8025443422 


In [63]:
# Import  Random Forest Regressor algorithm
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,r2_score
# Initiate the model
start = time()
ran_model = RandomForestRegressor(n_estimators = 100)
# Fit the model to our dataset
ran_model.fit(X_train_scaled, y_train)
test_predsr = ran_model.predict(X_test_scaled)
accuracy=r2_score(y_test, test_predsr)
test_mse_ran = mean_squared_error(y_test, test_predsr)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the model is:{test_mse_ran} ")

The duration of training is: 324.9240336418152
R_squared for the validation data is:0.9987262445567846
The mean squared error for the model is:659.1656124696976 


## 9. Predicting  PROI (Preferred Return on Investment)

In [65]:
#Splitting the training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y['PROI'], shuffle = True,test_size=0.33,random_state=100)

In [66]:
#Applying Standardization
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
scaler.fit(pd.DataFrame(X_train))
X_train_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_train)))
X_test_scaled = pd.DataFrame(scaler.transform(pd.DataFrame(X_test)))

In [67]:
from sklearn.linear_model import LinearRegression
# Import mean squared error
from sklearn.metrics import mean_squared_error
# Initiate the model
from time import time
start = time()
# Fit the model to our dataset
model = LinearRegression()
reg=model.fit(X_train_scaled, y_train)
test_pred=reg.predict(X_test_scaled)
test_mse = mean_squared_error(y_test, test_pred)
accuracy=model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse} ")

The duration of training is: 1.7934274673461914
R_squared for the validation data is:0.7363311914640781
The mean squared error for the testing model is:0.0006345510580954896 


In [68]:
# Import Ridge Regression algorithm
from sklearn.linear_model import Ridge
# Initiate the model
start = time()
ridg_model = Ridge(alpha=1.0)
# Fit the model to our dataset
ridg_model.fit(X_train_scaled, y_train)
test_preds = ridg_model.predict(X_test_scaled)
test_mse_ridg = mean_squared_error(y_test, test_preds)
accuracy=ridg_model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse_ridg} ")

The duration of training is: 0.2751901149749756
R_squared for the validation data is:0.7363270944797727
The mean squared error for the testing model is:0.0006345609179865416 


In [69]:
# Import Lasso Regression algorithm
from sklearn.linear_model import Lasso
# Initiate the model
start = time()
las_model = Lasso(alpha = 1, max_iter=2000, random_state=10)
# Fit the model to our dataset
las_model.fit(X_train_scaled, y_train)
test_preds = las_model.predict(X_test_scaled)
test_mse_las = mean_squared_error(y_test, test_preds)
accuracy=las_model.score(X_test_scaled, y_test)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the testing model is:{test_mse_las} ")


The duration of training is: 0.45366668701171875
R_squared for the validation data is:-6.654360350899502e-05
The mean squared error for the testing model is:0.0024067817764764146 


In [70]:
# Import  Random Forest Regressor algorithm
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error,r2_score
# Initiate the model
start = time()
ran_model = RandomForestRegressor(n_estimators = 100)
# Fit the model to our dataset
ran_model.fit(X_train_scaled, y_train)
test_predsr = ran_model.predict(X_test_scaled)
accuracy=r2_score(y_test, test_predsr)
test_mse_ran = mean_squared_error(y_test, test_predsr)
end = time()
duration=end-start
# Show the results
print(f"The duration of training is: {duration}")
print(f"R_squared for the validation data is:{accuracy}")
print(f"The mean squared error for the model is:{test_mse_ran} ")

The duration of training is: 224.2358226776123
R_squared for the validation data is:0.9986097335358249
The mean squared error for the model is:3.3458453458168026e-06 
