In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv("income-age.csv")
df

Unnamed: 0,age,experience,income
0,25,1,30450
1,30,3,35670
2,47,2,31580
3,32,5,40130
4,43,10,47830
5,51,7,41630
6,28,5,41340
7,33,4,37650
8,37,5,40250
9,39,8,45150


In [3]:
df.shape

(20, 3)

In [4]:
df.isnull().sum()

age           0
experience    0
income        0
dtype: int64

In [5]:
x = df.drop("income",axis=1)
x

Unnamed: 0,age,experience
0,25,1
1,30,3
2,47,2
3,32,5
4,43,10
5,51,7
6,28,5
7,33,4
8,37,5
9,39,8


In [6]:
y = df["income"]
y

0     30450
1     35670
2     31580
3     40130
4     47830
5     41630
6     41340
7     37650
8     40250
9     45150
10    27840
11    46110
12    36720
13    34800
14    51300
15    38900
16    63600
17    30870
18    44190
19    48700
Name: income, dtype: int64

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [9]:
from sklearn.linear_model import LinearRegression

In [10]:
model = LinearRegression().fit(x_train, y_train)
model

In [11]:
y_pred = model.predict(x_test)
y_pred

array([31093.38107376, 31295.49954076, 40250.46080162, 34897.6958918 ])

In [12]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [13]:
r2_sc = r2_score(y_test, y_pred)

In [14]:
print(f"R2 Score = ", r2_sc)

R2 Score =  0.9387098237077887


<h3>OPTIMIZATION</h3>

In [15]:
from sklearn.model_selection import GridSearchCV

In [16]:
model = LinearRegression()
model

In [17]:
param_grid = {
     'fit_intercept':[True,False],
    'copy_X':[True, False],
    'n_jobs':[-1, None],
    'positive':[False, True]
}

In [18]:
grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1)
grid_search.fit(x_train, y_train)

In [19]:
best_params = grid_search.best_params_
print("Best Parameters :", best_params)

Best Parameters : {'copy_X': True, 'fit_intercept': True, 'n_jobs': -1, 'positive': False}


In [20]:
best_model = LinearRegression(**best_params)
best_model.fit(x_train, y_train)
best_model

In [21]:
y_pred = best_model.predict(x_test)
y_pred

array([31093.38107376, 31295.49954076, 40250.46080162, 34897.6958918 ])

In [22]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2_sc = r2_score(y_test, y_pred)

In [23]:
print(f"R2 Score = ", r2_sc)
print("Best Parameters :", best_params)

R2 Score =  0.9387098237077887
Best Parameters : {'copy_X': True, 'fit_intercept': True, 'n_jobs': -1, 'positive': False}
