In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv('Credit.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,ID,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance
0,1,1,14.891,3606,283,2,34,11,Male,No,Yes,Caucasian,333
1,2,2,106.025,6645,483,3,82,15,Female,Yes,Yes,Asian,903
2,3,3,104.593,7075,514,4,71,11,Male,No,No,Asian,580
3,4,4,148.924,9504,681,3,36,11,Female,No,No,Asian,964
4,5,5,55.882,4897,357,2,68,16,Male,No,Yes,Caucasian,331


In [3]:
df=df.drop(df.columns[0], axis=1)

In [4]:
df.head()

Unnamed: 0,ID,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance
0,1,14.891,3606,283,2,34,11,Male,No,Yes,Caucasian,333
1,2,106.025,6645,483,3,82,15,Female,Yes,Yes,Asian,903
2,3,104.593,7075,514,4,71,11,Male,No,No,Asian,580
3,4,148.924,9504,681,3,36,11,Female,No,No,Asian,964
4,5,55.882,4897,357,2,68,16,Male,No,Yes,Caucasian,331


In [5]:
df.shape

(400, 12)

In [6]:
df.isnull().sum()

ID           0
Income       0
Limit        0
Rating       0
Cards        0
Age          0
Education    0
Gender       0
Student      0
Married      0
Ethnicity    0
Balance      0
dtype: int64

In [7]:
df.corr()['Income']

ID           0.037203
Income       1.000000
Limit        0.792088
Rating       0.791378
Cards       -0.018273
Age          0.175338
Education   -0.027692
Balance      0.463656
Name: Income, dtype: float64

In [8]:
from sklearn.preprocessing import LabelEncoder
df['Gender']= LabelEncoder().fit_transform(df['Gender'])
df['Student']= LabelEncoder().fit_transform(df['Student'])
df['Married']= LabelEncoder().fit_transform(df['Married'])
df['Ethnicity']= LabelEncoder().fit_transform(df['Ethnicity'])
df.head()

Unnamed: 0,ID,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance
0,1,14.891,3606,283,2,34,11,0,0,1,2,333
1,2,106.025,6645,483,3,82,15,1,1,1,1,903
2,3,104.593,7075,514,4,71,11,0,0,0,1,580
3,4,148.924,9504,681,3,36,11,1,0,0,1,964
4,5,55.882,4897,357,2,68,16,0,0,1,2,331


In [9]:
X = df[df.columns[~df.columns.isin(['Income','ID'])]]
y = df[["Income"]]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1234)


In [10]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(320, 10)
(80, 10)
(320, 1)
(80, 1)


In [11]:
from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.metrics import mean_squared_error,r2_score

### Linear Regression

In [12]:
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred= lr.predict(X_test)
print("RMSE:",np.sqrt(mean_squared_error(y_test,y_pred))) 
print("R2 SCORE:",r2_score(y_test, y_pred))

RMSE: 10.124098245348517
R2 SCORE: 0.9296125835084288


### Ridge Regression

In [13]:
from sklearn.model_selection import GridSearchCV

In [14]:
#RIDGE
rr = Ridge()
#parameters = {"alpha": range(0,10000,1)} #alpha is comming to lowest value
parameters = {"alpha":[0,0.1,0.2,0.3,0.4,0.5]}
model = GridSearchCV(rr,parameters,scoring="neg_mean_squared_error",cv=5)

model.fit(X,y)

model.best_params_

{'alpha': 0}

In [15]:
rr = Ridge(alpha=0)
rr.fit(X_train, y_train) 
y_pred= rr.predict(X_test)
print("RMSE:",np.sqrt(mean_squared_error(y_test,y_pred))) 
print("R2 SCORE:",r2_score(y_test, y_pred))

RMSE: 10.12409824534849
R2 SCORE: 0.9296125835084291


### Lasso Regression

In [16]:
#LASSO
lar=Lasso()
#parameters = {"alpha": range(0,10000,1)}
parameters = {"alpha":[0,0.1,0.2,0.3,0.4,0.5]}
model = GridSearchCV(lar,parameters,scoring="neg_mean_squared_error",cv=5)

###xtrain can be also given
model.fit(X,y)

model.best_params_

{'alpha': 0.1}

In [17]:
lar = Lasso(alpha=0.01)
lar.fit(X_train, y_train) 
y_pred= lar.predict(X_test)
print("RMSE:",np.sqrt(mean_squared_error(y_test,y_pred))) 
print("R2 SCORE:",r2_score(y_test, y_pred))

RMSE: 10.125985016542517
R2 SCORE: 0.9295863456506576


### ElasticNet Regression

In [None]:
#LASSO
enr = ElasticNet()
parameters = {"alpha": range(0,10000,1)}
#parameters = {"alpha":[0,0.1,0.2,0.3,0.4,0.5]}
model = GridSearchCV(lar,parameters,scoring="neg_mean_squared_error",cv=5)

###xtrain can be also given
model.fit(X,y)

model.best_params_

In [None]:
enr = ElasticNet(alpha = 0.01)
enr.fit(X_train, y_train) 
y_pred= enr.predict(X_test)
print("RMSE:",np.sqrt(mean_squared_error(y_test,y_pred))) 
print("R2 SCORE:",r2_score(y_test, y_pred))