# Linear Regression

#### Importing Libraries

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

#### Loading the dataset

In [26]:
data = pd.read_csv('dataset/spotify_features.csv', sep=',')
data.head()

Unnamed: 0,track_popularity,instrumentalness,duration_ms,energy,acousticness,album_month,album_year,danceability,loudness,liveness,genre_code,release_month_code,valence,artist_code,speechiness,tempo
0,67.0,0.00421,162600.0,0.815,0.0724,12,2019,0.726,-4.969,0.357,2,2,0.693,4535,0.106983,99.972
1,70.0,2.3e-05,176616.0,0.931,0.0794,7,2019,0.675,-3.432,0.19031,2,5,0.613,7724,0.0742,124.008
2,60.0,9e-06,169093.0,0.93,0.0287,7,2019,0.718,-3.778,0.204,2,5,0.509838,6862,0.102,121.956
3,62.0,0.0,187675.0,0.856,0.187,7,2019,0.449,-4.788,0.176,2,5,0.152,3635,0.0623,112.648
4,58.0,5e-06,207894.0,0.923,0.146,6,2019,0.679,-6.5,0.124,2,6,0.752,2056,0.181,121.984


#### Data Preprocessing

In [27]:
X = data.drop(columns=['track_popularity'])
y = data['track_popularity']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print(f'Training data: {X_train.shape}, {y_train.shape}')
print(f'Test data: {X_test.shape}, {y_test.shape}')

Training data: (15878, 15), (15878,)
Test data: (3970, 15), (3970,)


#### Standardization

In [28]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

#### Linear Regression Model Training

In [29]:
model = LinearRegression()
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

#### Model Evaluation

In [30]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)
print("Intercept:", model.intercept_)
print("Coefficients:", model.coef_)

Mean Squared Error: 530.1929094899106
R-squared: 0.07512857866533673
Intercept: 42.92925339342279
Coefficients: [-2.4144341  -2.6049894  -4.1038759   0.73724545  1.45543052  0.50530644
  0.96773041  3.59751417 -0.66725919  1.08272172  0.33019016  0.37587785
  0.31748703 -0.79841925  0.52449635]
