In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = '/content/drive/MyDrive/customers-100.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Dataset Preview:")
print(df.head())

# -------------------
# Regression Task
# -------------------
print("\nRegression Task:")

# Assume 'Index' is the target variable and encode 'Customer Id' as a feature
X_reg = pd.get_dummies(df['Customer Id'].astype(str), drop_first=True)
y_reg = df['Index']

# Train-test split
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# Train the regression model
linear_regressor = LinearRegression()
linear_regressor.fit(X_train_reg, y_train_reg)

# Predict on test data
y_pred_reg = linear_regressor.predict(X_test_reg)

# Evaluate the model
print("Mean Squared Error (MSE):", mean_squared_error(y_test_reg, y_pred_reg))
print("R² Score:", r2_score(y_test_reg, y_pred_reg))

# -------------------
# Classification Task
# -------------------
print("\nClassification Task:")

# Use 'City' and 'Country' as features and create a binary target variable from 'Index'
X_clf = pd.get_dummies(df[['City', 'Country']], drop_first=True)
y_clf = df['Index'] % 2  # Binary target for demonstration purposes

# Train-test split
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X_clf, y_clf, test_size=0.2, random_state=42)

# Train the classification model
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train_clf, y_train_clf)

# Predict on test data
y_pred_clf = clf.predict(X_test_clf)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test_clf, y_pred_clf))
print(classification_report(y_test_clf, y_pred_clf))


Dataset Preview:
   Index      Customer Id First Name Last Name  \
0      1  DD37Cf93aecA6Dc     Sheryl    Baxter   
1      2  1Ef7b82A4CAAD10    Preston    Lozano   
2      3  6F94879bDAfE5a6        Roy     Berry   
3      4  5Cef8BFA16c5e3c      Linda     Olsen   
4      5  053d585Ab6b3159     Joanna    Bender   

                           Company               City  \
0                  Rasmussen Group       East Leonard   
1                      Vega-Gentry  East Jimmychester   
2                    Murillo-Perry      Isabelborough   
3  Dominguez, Mcmillan and Donovan         Bensonview   
4         Martin, Lang and Andrade     West Priscilla   

                      Country                 Phone 1                Phone 2  \
0                       Chile            229.077.5154       397.884.0519x718   
1                    Djibouti              5153435776       686-620-1820x944   
2         Antigua and Barbuda         +1-539-402-0259    (496)978-3969x58947   
3          Dominica