#**Import Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer

#**Load dataset**

In [None]:
data = pd.read_csv('data.csv')

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                100 non-null    object 
 1   name              100 non-null    object 
 2   duration          100 non-null    float64
 3   energy            100 non-null    float64
 4   key               100 non-null    int64  
 5   loudness          100 non-null    float64
 6   mode              100 non-null    int64  
 7   speechiness       100 non-null    float64
 8   acousticness      100 non-null    float64
 9   instrumentalness  100 non-null    float64
 10  liveness          100 non-null    float64
 11  valence           100 non-null    float64
 12  tempo             100 non-null    float64
 13  danceability      100 non-null    float64
 14  repeated_plays    100 non-null    int64  
 15  timestamp         100 non-null    object 
dtypes: float64(10), int64(3), object(3)
memory us

#**Cleaning dataset**

In [None]:
data = data.drop(['id', 'timestamp'], axis=1)
data

Unnamed: 0,name,duration,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,danceability,repeated_plays
0,Good 4 U Olivia Rodrigo,2.97,0.664,9,-5.044,1,0.1540,0.33500,0.000,0.0849,0.688,166.928,0.563,1
1,Stay The Kid LAROI & Justin Bieber,2.30,0.506,8,-11.275,1,0.0589,0.37900,0.868,0.1100,0.454,170.054,0.564,0
2,Levitating Dua Lipa feat. DaBaby,3.38,0.825,6,-3.787,0,0.0601,0.00883,0.000,0.0674,0.915,102.977,0.702,1
3,Peaches Justin Bieber feat. Daniel Caesar & Gi...,3.30,0.696,0,-6.181,1,0.1190,0.32100,0.000,0.4200,0.464,90.030,0.677,0
4,Montero (Call Me By Your Name) Lil Nas X,2.30,0.503,8,-6.725,0,0.2200,0.29300,0.000,0.4050,0.710,178.781,0.593,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Peaches (feat. Daniel Caesar & Giveon) Justin ...,3.30,0.696,0,-6.181,1,0.1190,0.32100,0.000,0.4200,0.464,90.030,0.677,0
96,Dance With Me Tonight Olly Murs,3.37,0.748,11,-5.922,0,0.0589,0.30500,0.000,0.0811,0.964,163.984,0.672,0
97,Therefore I Am Billie Eilish,2.91,0.340,11,-7.773,0,0.0697,0.21800,0.130,0.0550,0.716,94.009,0.889,2
98,Stitches Shawn Mendes,3.45,0.754,1,-6.684,1,0.0615,0.01510,0.000,0.0486,0.755,149.789,0.752,1


In [None]:
data.shape

(100, 14)

#**Encoding**

In [None]:
lb = LabelEncoder()
data['name'] = lb.fit_transform(data['name'])

In [None]:
data

Unnamed: 0,name,duration,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,danceability,repeated_plays
0,27,2.97,0.664,9,-5.044,1,0.1540,0.33500,0.000,0.0849,0.688,166.928,0.563,1
1,67,2.30,0.506,8,-11.275,1,0.0589,0.37900,0.868,0.1100,0.454,170.054,0.564,0
2,44,3.38,0.825,6,-3.787,0,0.0601,0.00883,0.000,0.0674,0.915,102.977,0.702,1
3,55,3.30,0.696,0,-6.181,1,0.1190,0.32100,0.000,0.4200,0.464,90.030,0.677,0
4,47,2.30,0.503,8,-6.725,0,0.2200,0.29300,0.000,0.4050,0.710,178.781,0.593,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,54,3.30,0.696,0,-6.181,1,0.1190,0.32100,0.000,0.4200,0.464,90.030,0.677,0
96,14,3.37,0.748,11,-5.922,0,0.0589,0.30500,0.000,0.0811,0.964,163.984,0.672,0
97,76,2.91,0.340,11,-7.773,0,0.0697,0.21800,0.130,0.0550,0.716,94.009,0.889,2
98,68,3.45,0.754,1,-6.684,1,0.0615,0.01510,0.000,0.0486,0.755,149.789,0.752,1


#**Access dataset**

In [None]:
x = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [None]:
impute = SimpleImputer(missing_values=0, strategy='mean')
impute = impute.fit_transform(x)

In [None]:
impute

array([[ 27.   ,   2.97 ,   0.664, ...,   0.688, 166.928,   0.563],
       [ 67.   ,   2.3  ,   0.506, ...,   0.454, 170.054,   0.564],
       [ 44.   ,   3.38 ,   0.825, ...,   0.915, 102.977,   0.702],
       ...,
       [ 76.   ,   2.91 ,   0.34 , ...,   0.716,  94.009,   0.889],
       [ 68.   ,   3.45 ,   0.754, ...,   0.755, 149.789,   0.752],
       [ 47.   ,   2.3  ,   0.503, ...,   0.71 , 178.781,   0.593]])

#**Scaling dataset**

In [None]:
sc = StandardScaler()
x = sc.fit_transform(x)
x
x.shape

(100, 13)

#**Split dataset**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

#**Logistic Regression Model**

In [None]:
classifier = LogisticRegression(C = 10, random_state=0)
classifier.fit(x_train, y_train)

#**Predict new result**

In [None]:
y_predict = classifier.predict(sc.transform([[27, 2.97, 0.664, 9, -5.044, 1, 0.154, 0.335, 0, 0.09, 0.7, 166.1, 0.6]])) ## predict Index[0] == First row
y_predict

array([1])

#**Predict test set**

In [None]:
test_predict = classifier.predict(x_test)
print(np.concatenate((test_predict.reshape(len(test_predict), 1), y_test.reshape(len(y_test), 1)), 1))

[[0 0]
 [0 1]
 [0 1]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 1]
 [1 0]
 [1 0]
 [0 0]
 [0 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 1]
 [0 0]
 [0 0]
 [0 1]]


#**Matrix Confusion and Accuracy**

In [None]:
cm = confusion_matrix(y_test, test_predict)
print(cm)
accuracy_score(y_test, test_predict)

[[8 2]
 [7 3]]


0.55