In [1]:
import pandas as pd
df = pd.read_csv("../data/mobile_new.csv")

In [2]:
df.head(2)

Unnamed: 0,screen_size,rear_camera_mp,front_camera_mp,ram,battery,weight,normalized_used_price
0,14.5,13.0,5.0,3.0,3020.0,146.0,4.307572
1,17.3,13.0,16.0,8.0,4300.0,213.0,5.162097


### R Squared

We can build models with increasing number of predictors and see what is the effect of this on R Squared.

- Model 1: $price = \beta_0+\beta_1ScreenSize$
- Model 2: $price = \beta_0+\beta_1ScreenSize+\beta_2RearCameraMP$
- Model 3: $price = \beta_0+\beta_1ScreenSize+\beta_2RearCameraMP+\beta_4FrontCameraMP$
- Model 4: $price = \beta_0+\beta_1ScreenSize+\beta_2RearCameraMP+\beta_4FrontCameraMP+\beta_5Ram$
- Model 5: $price = \beta_0+\beta_1ScreenSize+\beta_2RearCameraMP+\beta_4FrontCameraMP+\beta_5Ram+\beta_6Battery$
- Model 6: $price = \beta_0+\beta_1ScreenSize+\beta_2RearCameraMP+\beta_4FrontCameraMP+\beta_5Ram+\beta_6Battery\beta_7Weight$

In [3]:
from sklearn import linear_model
from sklearn.metrics import r2_score
reg = linear_model.LinearRegression()

In [4]:
## Model 1
X = df[['screen_size']].values
y = df['normalized_used_price'].values

In [5]:
m1 = reg.fit(X,y)

In [6]:
preds = m1.predict(X)
r2_score(y,preds)

0.37271873646115306

In [7]:
config_m1 = {'r':r2_score(y,preds),'n':len(X),'p':1}

In [8]:
## Model 2
X = df[['screen_size','rear_camera_mp']].values
y = df['normalized_used_price'].values
m2 = reg.fit(X,y)
preds = m2.predict(X)
r2_score(y,preds)

0.624105405744466

In [9]:
config_m2 = {'r':r2_score(y,preds),'n':len(X),'p':2}

In [10]:
## Model 3
X = df[['screen_size','rear_camera_mp','front_camera_mp']].values
y = df['normalized_used_price'].values
m3 = reg.fit(X,y)
preds = m3.predict(X)
r2_score(y,preds)

0.6965978244613198

In [11]:
config_m3 = {'r':r2_score(y,preds),'n':len(X),'p':3}

In [12]:
## Model 4
X = df[['screen_size','rear_camera_mp','front_camera_mp','ram']].values
y = df['normalized_used_price'].values
m4 = reg.fit(X,y)
preds = m4.predict(X)
r2_score(y,preds)

0.7207596508671616

In [13]:
config_m4 = {'r':r2_score(y,preds),'n':len(X),'p':4}

In [14]:
## Model 5
X = df[['screen_size','rear_camera_mp','front_camera_mp','ram','battery']].values
y = df['normalized_used_price'].values
m5 = reg.fit(X,y)
preds = m5.predict(X)
r2_score(y,preds)

0.7227265974524962

In [15]:
config_m5 = {'r':r2_score(y,preds),'n':len(X),'p':5}

In [16]:
## Model 6
X = df[['screen_size','rear_camera_mp','front_camera_mp','ram','battery','weight']].values
y = df['normalized_used_price'].values
m6 = reg.fit(X,y)
preds = m6.predict(X)
r2_score(y,preds)

0.7267105781749856

In [17]:
config_m6 = {'r':r2_score(y,preds),'n':len(X),'p':6}

### Adjusted R Squared

Sklearn doesn't have a native implimentation, but we can always create one as we know

$Adj R^2 = 1-\frac{(1-R^2)(n-1)}{(n-p-1)}$

In [18]:
def adj_r(r,n,p):
    return 1-(1-r*r)*(n-1)/(n-p-1)

In [19]:
adj_r(**config_m1)

0.1386547155741782

In [20]:
adj_r(**config_m2)

0.389132331638948

In [21]:
adj_r(**config_m3)

0.48477381204059555

In [22]:
adj_r(**config_m4)

0.518903446611287

In [23]:
adj_r(**config_m5)

0.5215990895324807

In [24]:
adj_r(**config_m6)

0.5272370796888337

### Summarizing results

In [25]:
pd.DataFrame({'R Squared':[config_m1['r'],
                           config_m2['r'],
                           config_m3['r'],
                           config_m4['r'],
                           config_m5['r'],
                           config_m6['r']],
             'Adj R Squared':[adj_r(**config_m1),
                             adj_r(**config_m2),
                             adj_r(**config_m3),
                             adj_r(**config_m4),
                             adj_r(**config_m5),
                             adj_r(**config_m6)]})

Unnamed: 0,R Squared,Adj R Squared
0,0.372719,0.138655
1,0.624105,0.389132
2,0.696598,0.484774
3,0.72076,0.518903
4,0.722727,0.521599
5,0.726711,0.527237
