In [3]:
# Task 01

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


data = pd.read_csv('heart.csv')
data.info()
data.describe()
print(data.isnull().sum())



X = data.drop('target', axis=1)
y = data['target']


scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results = []


# L1 regularization

model_l1 = LogisticRegression(penalty='l1', solver='liblinear')

model_l1.fit(X_train, y_train)
train_acc_l1 = accuracy_score(y_train, model_l1.predict(X_train))
test_acc_l1 = accuracy_score(y_test, model_l1.predict(X_test))

results.append({'Penalty': 'L1', 'Training Accuracy': train_acc_l1, 'Testing Accuracy': test_acc_l1})


# L2 regularization

model_l2 = LogisticRegression(penalty='l2', solver='lbfgs')

model_l2.fit(X_train, y_train)
train_acc_l2 = accuracy_score(y_train, model_l2.predict(X_train))
test_acc_l2 = accuracy_score(y_test, model_l2.predict(X_test))

results.append({'Penalty': 'L2', 'Training Accuracy': train_acc_l2, 'Testing Accuracy': test_acc_l2})


# Elastic Net regularization

model_elasticnet = LogisticRegression(penalty='elasticnet', solver='saga', l1_ratio=0.5)

model_elasticnet.fit(X_train, y_train)
train_acc_elasticnet = accuracy_score(y_train, model_elasticnet.predict(X_train))
test_acc_elasticnet = accuracy_score(y_test, model_elasticnet.predict(X_test))
results.append({'Penalty': 'Elastic Net', 'Training Accuracy': train_acc_elasticnet, 'Testing Accuracy': test_acc_elasticnet})


results_df = pd.DataFrame(results)
print("\nAll Training and Testing Accuracies:")
print(results_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB
age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

All Tr

In [5]:
# Task 02


import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

iris = load_iris()

X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

solvers = ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga']
results = []

for s in solvers:
    model = LogisticRegression(solver=s)
    model.fit(X_train, y_train)
    
    train_accuracy = accuracy_score(y_train, model.predict(X_train))
    test_accuracy = accuracy_score(y_test, model.predict(X_test))

    results.append({'Solver': s, 'Training Accuracy': train_accuracy, 'Testing Accuracy': test_accuracy})
    
results_df = pd.DataFrame(results)
print(results_df)


# Testing on heart dataset

from sklearn.preprocessing import StandardScaler

data = pd.read_csv('heart.csv')
X = data.drop('target', axis=1)
y = data['target']


scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

results_heart = []

for s in solvers:
    
    model = LogisticRegression(solver=s, penalty='l2', max_iter=1000)
    model.fit(X_train, y_train)

    train_accuracy = accuracy_score(y_train, model.predict(X_train))
    test_accuracy = accuracy_score(y_test, model.predict(X_test))

    results_heart.append({'Solver': s, 'Training Accuracy': train_accuracy, 'Testing Accuracy': test_accuracy})


results_heart_df = pd.DataFrame(results_heart)
print(results_heart_df)



            Solver  Training Accuracy  Testing Accuracy
0            lbfgs           0.975000               1.0
1        liblinear           0.958333               1.0
2        newton-cg           0.975000               1.0
3  newton-cholesky           0.975000               1.0
4              sag           0.983333               1.0
5             saga           0.975000               1.0
            Solver  Training Accuracy  Testing Accuracy
0            lbfgs           0.871951          0.795122
1        liblinear           0.871951          0.795122
2        newton-cg           0.871951          0.795122
3  newton-cholesky           0.871951          0.795122
4              sag           0.871951          0.795122
5             saga           0.871951          0.795122




In [6]:
# Task 03


import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, classification_report

iris = load_iris()
X, y = iris.data, iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Model
lr_model = LogisticRegression(solver='lbfgs')
lr_model.fit(X_train, y_train)


lr_train_acc = accuracy_score(y_train, lr_model.predict(X_train))
lr_test_acc = accuracy_score(y_test, lr_model.predict(X_test))
print("Logistic Regression Result:")
print(f"Training Accuracy: {lr_train_acc:}")
print(f"Testing Accuracy: {lr_test_acc:}")

# Perceptron Model
perc_model = Perceptron(max_iter=1000, random_state=42)
perc_model.fit(X_train, y_train)

perc_train_acc = accuracy_score(y_train, perc_model.predict(X_train))
perc_test_acc = accuracy_score(y_test, perc_model.predict(X_test))
print("\nPerceptron Result:")
print(f"Training Accuracy: {perc_train_acc:}")
print(f"Testing Accuracy: {perc_test_acc:}")




Logistic Regression Result:
Training Accuracy: 0.975
Testing Accuracy: 1.0

Perceptron Result:
Training Accuracy: 0.675
Testing Accuracy: 0.6333333333333333


In [8]:
# Task 04

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


data = pd.read_csv('fraud_detection.csv')

data.info()
data.head()
data.describe()


data = data.drop(['Credit_card_number', 'Expiry'], axis=1)

label_encoder = LabelEncoder()
data['Profession'] = label_encoder.fit_transform(data['Profession'])

X = data.drop('Fraud', axis=1)
y = data['Fraud']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)



model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1])) # Hidden Layer 1
model.add(Dense(64, activation='tanh')) # Hidden Layer 2
model.add(Dense(32, activation='relu')) # Hidden Layer 3
model.add(Dense(1, activation='sigmoid')) # Output Layer (Binary)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)


print("\nModel Evaluation on Test Data:")
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")


y_pred = (model.predict(X_test) > 0.5).astype("int32")

accuracy = accuracy_score(y_test,y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)


print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   Profession          10000 non-null  object
 1   Income              10000 non-null  int64 
 2   Credit_card_number  10000 non-null  int64 
 3   Expiry              10000 non-null  object
 4   Security_code       10000 non-null  int64 
 5   Fraud               10000 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 468.9+ KB


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.4981 - loss: 0.6960 - val_accuracy: 0.5119 - val_loss: 0.6940
Epoch 2/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5113 - loss: 0.6944 - val_accuracy: 0.4944 - val_loss: 0.6942
Epoch 3/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5194 - loss: 0.6928 - val_accuracy: 0.5125 - val_loss: 0.6930
Epoch 4/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5133 - loss: 0.6933 - val_accuracy: 0.5150 - val_loss: 0.6935
Epoch 5/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5153 - loss: 0.6924 - val_accuracy: 0.5013 - val_loss: 0.6931
Epoch 6/50
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5194 - loss: 0.6922 - val_accuracy: 0.4950 - val_loss: 0.6947
Epoch 7/50
[1m200/200[0m 

In [9]:
# Task 05
# Dataset 1


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical


wine_data = pd.read_csv("WineQT.csv")

wine_data.info()
wine_data.head()
wine_data.describe()

wine_data = wine_data.drop(columns=["Id"]) 
wine_features = wine_data.drop(columns=["quality"])
wine_target = wine_data["quality"]

# One hot encoding for multi class classification
wine_target = to_categorical(wine_target)

scaler = StandardScaler()
wine_features = scaler.fit_transform(wine_features)

X_train, X_test, y_train, y_test = train_test_split(wine_features, wine_target, test_size=0.2, random_state=42)

model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))  # Hidden Layer 1
model.add(Dense(64, activation='tanh'))  # Hidden Layer 2
model.add(Dense(32, activation='relu'))  # Hidden Layer 3
model.add(Dense(y_train.shape[1], activation='softmax'))  # Output Layer (Multi-class)


model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

print("\nModel Evaluation on Test Data:")
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

y_pred_probs = model.predict(X_test)
y_pred = y_pred_probs.argmax(axis=1)
y_test_classes = y_test.argmax(axis=1)

accuracy = accuracy_score(y_test_classes, y_pred)
precision = precision_score(y_test_classes, y_pred, average='weighted')
recall = recall_score(y_test_classes, y_pred, average='weighted')
f1 = f1_score(y_test_classes, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1143 entries, 0 to 1142
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1143 non-null   float64
 1   volatile acidity      1143 non-null   float64
 2   citric acid           1143 non-null   float64
 3   residual sugar        1143 non-null   float64
 4   chlorides             1143 non-null   float64
 5   free sulfur dioxide   1143 non-null   float64
 6   total sulfur dioxide  1143 non-null   float64
 7   density               1143 non-null   float64
 8   pH                    1143 non-null   float64
 9   sulphates             1143 non-null   float64
 10  alcohol               1143 non-null   float64
 11  quality               1143 non-null   int64  
 12  Id                    1143 non-null   int64  
dtypes: float64(11), int64(2)
memory usage: 116.2 KB
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - accuracy: 0.4036 - loss: 1.7313 - val_accuracy: 0.5355 - val_loss: 1.3197
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.5581 - loss: 1.1741 - val_accuracy: 0.5027 - val_loss: 1.1235
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.5964 - loss: 1.0414 - val_accuracy: 0.5956 - val_loss: 1.0208
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6142 - loss: 0.9924 - val_accuracy: 0.5246 - val_loss: 1.0359
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6115 - loss: 0.9651 - val_accuracy: 0.5738 - val_loss: 0.9953
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6361 - loss: 0.9423 - val_accuracy: 0.5683 - val_loss: 0.9829
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [10]:
# Task 05
# Dataset 2

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

data = pd.read_csv("data.csv")

data.info()
data.head()
data.describe()

data = data.drop(columns=["date", "street", "city", "statezip", "country"])

data_features = data.drop(columns=["price"])
data_target = data["price"]

scaler = StandardScaler()
data_features = scaler.fit_transform(data_features)

X_train, X_test, y_train, y_test = train_test_split(data_features, data_target, test_size=0.2, random_state=42)


model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))  # Hidden Layer 1
model.add(Dense(64, activation='tanh'))  # Hidden Layer 2
model.add(Dense(32, activation='relu'))  # Hidden Layer 3
model.add(Dense(1, activation='linear'))  # Output Layer for Regression

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])

model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

print("\nModel Evaluation on Test Data:")
test_loss, test_mse = model.evaluate(X_test, y_test)
print(f"Test Loss (MSE): {test_loss}")

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R2 Score: {r2}")



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4600 entries, 0 to 4599
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   date           4600 non-null   object 
 1   price          4600 non-null   float64
 2   bedrooms       4600 non-null   float64
 3   bathrooms      4600 non-null   float64
 4   sqft_living    4600 non-null   int64  
 5   sqft_lot       4600 non-null   int64  
 6   floors         4600 non-null   float64
 7   waterfront     4600 non-null   int64  
 8   view           4600 non-null   int64  
 9   condition      4600 non-null   int64  
 10  sqft_above     4600 non-null   int64  
 11  sqft_basement  4600 non-null   int64  
 12  yr_built       4600 non-null   int64  
 13  yr_renovated   4600 non-null   int64  
 14  street         4600 non-null   object 
 15  city           4600 non-null   object 
 16  statezip       4600 non-null   object 
 17  country        4600 non-null   object 
dtypes: float

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 448288882688.0000 - mean_squared_error: 448288882688.0000 - val_loss: 401475076096.0000 - val_mean_squared_error: 401475076096.0000
Epoch 2/50
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 448226361344.0000 - mean_squared_error: 448226361344.0000 - val_loss: 401394925568.0000 - val_mean_squared_error: 401394925568.0000
Epoch 3/50
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 448126779392.0000 - mean_squared_error: 448126779392.0000 - val_loss: 401278402560.0000 - val_mean_squared_error: 401278402560.0000
Epoch 4/50
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 447987580928.0000 - mean_squared_error: 447987580928.0000 - val_loss: 401121247232.0000 - val_mean_squared_error: 401121247232.0000
Epoch 5/50
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 447807651840.0000 - mea