In [28]:
# ქვიზი 5 - 6
# გიორგი გაჯიშვილი

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
file_path = 'spotify_data.csv'
spotify_data = pd.read_csv(file_path, encoding='latin1')

In [21]:
#1 Simple Linear Regression

data = spotify_data[['Spotify Streams', 'Track Score']].dropna()
data['Spotify Streams'] = data['Spotify Streams'].str.replace(',', '').astype(float)

X = data[['Track Score']]
y = data['Spotify Streams']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

new_data = pd.DataFrame({'Track Score': [500, 600, 700]})
new_predictions = model.predict(new_data)
print("Predictions on new data:", new_predictions)

Mean Squared Error: 2.5295566486403574e+17
R^2 Score: 0.0830713124670256
Predictions on new data: [1.95808652e+09 2.28837274e+09 2.61865897e+09]


In [22]:
#2. Multiple Linear Regression

data = spotify_data[['Spotify Streams', 'Track Score', 'Spotify Playlist Count', 'Shazam Counts']].dropna()
data['Spotify Streams'] = data['Spotify Streams'].str.replace(',', '').astype(float)
data['Spotify Playlist Count'] = data['Spotify Playlist Count'].str.replace(',', '').astype(float)
data['Shazam Counts'] = data['Shazam Counts'].str.replace(',', '').astype(float)

X = data[['Track Score', 'Spotify Playlist Count', 'Shazam Counts']]
y = data['Spotify Streams']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

new_data = pd.DataFrame({'Track Score': [500, 600, 700], 'Spotify Playlist Count': [30000, 40000, 50000], 'Shazam Counts': [2000000, 3000000, 4000000]})
new_predictions = model.predict(new_data)
print("Predictions on new data:", new_predictions)


Mean Squared Error: 5.820821480226038e+16
R^2 Score: 0.8096848753282524
Predictions on new data: [6.81441401e+08 8.55127892e+08 1.02881438e+09]


In [23]:
#3. ხის რეგრესიის მოდელი

from sklearn.tree import DecisionTreeRegressor

data = spotify_data[['Spotify Streams', 'Track Score']].dropna() 
data['Spotify Streams'] = data['Spotify Streams'].str.replace(',', '').astype(float)

X = data[['Track Score']]
y = data['Spotify Streams']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeRegressor()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

new_data = pd.DataFrame({'Track Score': [500, 600, 700]})
new_predictions = model.predict(new_data)
print("Predictions on new data:", new_predictions)


Mean Squared Error: 3.314052984969807e+17
R^2 Score: -0.2012975694995549
Predictions on new data: [6.01309283e+08 3.23703884e+08 3.90470936e+08]


In [24]:
#4. ლოჯისტიკური რეგრესიის მოდელი

from sklearn.linear_model import LogisticRegression

data = spotify_data[['Explicit Track', 'Track Score', 'Spotify Streams']].dropna()
data['Spotify Streams'] = data['Spotify Streams'].str.replace(',', '').astype(float)
data['Explicit Track'] = data['Explicit Track'].astype(int)

X = data[['Track Score', 'Spotify Streams']]
y = data['Explicit Track']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = model.score(X_test, y_test)

print(f"Accuracy: {accuracy}")

new_data = pd.DataFrame({'Track Score': [500, 600, 700], 'Spotify Streams': [300000000, 400000000, 500000000]})
new_predictions = model.predict(new_data)
print("Predictions on new data:", new_predictions)


Accuracy: 0.6191536748329621
Predictions on new data: [0 0 0]


In [25]:
#5. გადაწყვეტილების ხის კლასიფიკაციის მოდელი

from sklearn.tree import DecisionTreeClassifier

data = spotify_data[['Explicit Track', 'Track Score', 'Spotify Streams']].dropna()
data['Spotify Streams'] = data['Spotify Streams'].str.replace(',', '').astype(float)
data['Explicit Track'] = data['Explicit Track'].astype(int)

X = data[['Track Score', 'Spotify Streams']]
y = data['Explicit Track']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = model.score(X_test, y_test)

print(f"Accuracy: {accuracy}")

new_data = pd.DataFrame({'Track Score': [500, 600, 700], 'Spotify Streams': [300000000, 400000000, 500000000]})
new_predictions = model.predict(new_data)
print("Predictions on new data:", new_predictions)


Accuracy: 0.5590200445434298
Predictions on new data: [1 0 0]
