In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score, classification_report

In [None]:

# Load datasets
iris_df = pd.read_csv("/content/sample_data/Iris.csv")
customer_df = pd.read_csv("/content/sample_data/Customer Purchasing Behaviors.csv")

In [None]:
# Quick look at the data
print("Customer data shape:", customer_df.shape)
print("Customer data Frame\n\n",customer_df.head(),"\n")
print("Iris data Frame\n\n",iris_df.head(),"\n")
print("Iris data shape:", iris_df.shape)



Customer data shape: (238, 7)
Customer data Frame

    user_id  age  annual_income  purchase_amount  loyalty_score region  \
0        1   25          45000              200            4.5  North   
1        2   34          55000              350            7.0  South   
2        3   45          65000              500            8.0   West   
3        4   22          30000              150            3.0   East   
4        5   29          47000              220            4.8  North   

   purchase_frequency  
0                  12  
1                  18  
2                  22  
3                  10  
4                  13   

Iris data Frame

    Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1   

In [None]:
# ---- LINEAR REGRESSION (Customer Purchasing) ----
print("\n=== Linear Regression (Customer Loyalty) ===")

# Drop unnecessary columns
customer_df.drop(columns=['user_id'], inplace=True)

# Encode categorical column 'region'
customer_df = pd.get_dummies(customer_df, columns=['region'], drop_first=True)

# Define features and target
X_cust = customer_df.drop(columns=['loyalty_score'])
y_cust = customer_df['loyalty_score']

# Split data (80-20 split)
X_train_cust, X_test_cust, y_train_cust, y_test_cust = train_test_split(
    X_cust, y_cust, test_size=0.2, random_state=42
)

# Train model
lin_reg = LinearRegression()
lin_reg.fit(X_train_cust, y_train_cust)

# Predictions & Evaluation
y_pred_cust = lin_reg.predict(X_test_cust)
rmse = np.sqrt(mean_squared_error(y_test_cust, y_pred_cust))
r2 = lin_reg.score(X_test_cust, y_test_cust)

print(f"Linear Regression RMSE: {rmse:.4f}")
print(f"Linear Regression R² Score: {r2:.4f}")


=== Linear Regression (Customer Loyalty) ===
Linear Regression RMSE: 0.1860
Linear Regression R² Score: 0.9916


In [None]:
# ---- LOGISTIC REGRESSION (Iris) ----
print("\n=== Logistic Regression (Iris Classification) ===")

# Drop 'Id' column
iris_df.drop(columns=['Id'], inplace=True)

#Encode target variable
label_encoder = LabelEncoder()
iris_df['Species'] = label_encoder.fit_transform(iris_df['Species'])

# Define features and target
X_iris = iris_df.drop(columns=['Species'])
y_iris = iris_df['Species']

# Split data (80-20 split)
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=42
)

#Train model
log_reg = LogisticRegression(max_iter=200, random_state=42)
log_reg.fit(X_train_iris, y_train_iris)

#Predictions & Evaluation
y_pred_iris = log_reg.predict(X_test_iris)
accuracy = accuracy_score(y_test_iris, y_pred_iris)

print(f"Logistic Regression Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_iris, y_pred_iris, target_names=label_encoder.classes_))


=== Logistic Regression (Iris Classification) ===
Logistic Regression Accuracy: 1.0000

Classification Report:
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30

