In [30]:
#Import Required Libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix



In [31]:

#Load the dataset
url="https://github.com/mishravipul/data/raw/main/divorce.csv"
df=pd.read_csv(url,delimiter=';')
print(df.head())

   Atr1  Atr2  Atr3  Atr4  Atr5  Atr6  Atr7  Atr8  Atr9  Atr10  ...  Atr46  \
0     2     2     4     1     0     0     0     0     0      0  ...      2   
1     4     4     4     4     4     0     0     4     4      4  ...      2   
2     2     2     2     2     1     3     2     1     1      2  ...      3   
3     3     2     3     2     3     3     3     3     3      3  ...      2   
4     2     2     1     1     1     1     0     0     0      0  ...      2   

   Atr47  Atr48  Atr49  Atr50  Atr51  Atr52  Atr53  Atr54  Class  
0      1      3      3      3      2      3      2      1      1  
1      2      3      4      4      4      4      2      2      1  
2      2      3      1      1      1      2      2      2      1  
3      2      3      3      3      3      2      2      2      1  
4      1      2      3      2      2      2      1      0      1  

[5 rows x 55 columns]


In [32]:
#Add a bias column
df.insert(0,'Bias',1)
print(df.head())

   Bias  Atr1  Atr2  Atr3  Atr4  Atr5  Atr6  Atr7  Atr8  Atr9  ...  Atr46  \
0     1     2     2     4     1     0     0     0     0     0  ...      2   
1     1     4     4     4     4     4     0     0     4     4  ...      2   
2     1     2     2     2     2     1     3     2     1     1  ...      3   
3     1     3     2     3     2     3     3     3     3     3  ...      2   
4     1     2     2     1     1     1     1     0     0     0  ...      2   

   Atr47  Atr48  Atr49  Atr50  Atr51  Atr52  Atr53  Atr54  Class  
0      1      3      3      3      2      3      2      1      1  
1      2      3      4      4      4      4      2      2      1  
2      2      3      1      1      1      2      2      2      1  
3      2      3      3      3      3      2      2      2      1  
4      1      2      3      2      2      2      1      0      1  

[5 rows x 56 columns]


In [33]:
#Define X and y
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values

#print the shape and datatypes of X and y
print("Shape of X: ",X.shape)
print("Shape of y: ",y.shape)
print("Datatype of X: ",X.dtype)
print("Datatype of y: ",y.dtype)


Shape of X:  (170, 55)
Shape of y:  (170,)
Datatype of X:  int64
Datatype of y:  int64


In [34]:
#Split the Dataset into training and Testing Sets
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=42)

#print the shape of the training and testing sets
print("Shape of X_train:",X_train.shape)
print("Shape of X_test:",X_test.shape)
print("Shape of y_train:",y_train.shape)
print("Shape of y_test:",y_test.shape)

Shape of X_train: (144, 55)
Shape of X_test: (26, 55)
Shape of y_train: (144,)
Shape of y_test: (26,)


In [39]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, max_iter=1000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.weights = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def log_likelihood(self, X, y):
        z = np.dot(X, self.weights)
        return -np.sum(y * np.log(self.sigmoid(z)) + (1 - y) * np.log(1 - self.sigmoid(z)))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)

        for _ in range(self.max_iter):
            z = np.dot(X, self.weights)
            predictions = self.sigmoid(z)
            gradient = np.dot(X.T, (predictions - y)) / n_samples
            self.weights -= self.learning_rate * gradient

    def predict_proba(self, X):
        z = np.dot(X, self.weights)
        return self.sigmoid(z)

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

In [40]:
#Train the data
model=LogisticRegression(learning_rate=0.01,max_iter=1000)
model.fit(X_train,y_train)



In [41]:
#Calculate accuracy
def calculate_accuracy(y_true,y_pred):
  return accuracy_score(y_true,y_pred)

y_train_pred=model.predict(X_train)
y_test_pred=model.predict(X_test)

train_accuracy=calculate_accuracy(y_train,y_train_pred)
test_accuracy=calculate_accuracy(y_test,y_test_pred)

print("Training Accuracy:",train_accuracy)
print("Testing Accuracy: ",test_accuracy)

Training Accuracy: 0.9861111111111112
Testing Accuracy:  0.9615384615384616


In [42]:
#Print Classification Metrics
print("Classification Report( Test Data):")
print(classification_report(y_test,y_test_pred))

print("Confusion Matrix (Test Data):")
print(confusion_matrix(y_test,y_test_pred))

Classification Report( Test Data):
              precision    recall  f1-score   support

           0       0.92      1.00      0.96        11
           1       1.00      0.93      0.97        15

    accuracy                           0.96        26
   macro avg       0.96      0.97      0.96        26
weighted avg       0.96      0.96      0.96        26

Confusion Matrix (Test Data):
[[11  0]
 [ 1 14]]


In [43]:
#Fun Activity
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

model.fit(X_train_scaled,y_train)

y_train_pred_scaled=model.predict(X_train_scaled)
y_test_pred_scaled=model.predict(X_test_scaled)

train_accuracy_scaled=calculate_accuracy(y_train,y_train_pred_scaled)
test_accuracy_scaled=calculate_accuracy(y_test,y_test_pred_scaled)

print("Train Accuracy (Scaled):",train_accuracy_scaled)
print("Testing Accuracy (Scaled):",test_accuracy_scaled)

Train Accuracy (Scaled): 0.9791666666666666
Testing Accuracy (Scaled): 0.9615384615384616
