In [27]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [45]:
df=pd.read_csv("mouse_relationship_dataset.csv")

In [46]:
df.head()

Unnamed: 0,Age,Gender,Usage,Mouse
0,48,Male,Gaming,Corsair Ironclaw Wireless
1,36,Other,Work,Corsair Scimitar Pro RGB
2,44,Male,Work,Corsair Scimitar Pro RGB
3,55,Male,Gaming,Corsair Ironclaw Wireless
4,55,Other,Work,Corsair Scimitar Pro RGB


We know the data is already clean without any null values because we synthesised it 

In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Age     1000 non-null   int64 
 1   Gender  1000 non-null   object
 2   Usage   1000 non-null   object
 3   Mouse   1000 non-null   object
dtypes: int64(1), object(3)
memory usage: 31.4+ KB


In [48]:
# Splitting the columns into dependent and independent deatures
X = df[['Age', 'Gender', 'Usage']] 
y = df['Mouse']

In [49]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [51]:
gender_encoder = LabelEncoder()
usage_encoder = LabelEncoder()
mouse_encoder = LabelEncoder()

# Encode training data (fit_transform)
X_train['Gender'] = gender_encoder.fit_transform(X_train['Gender'])
X_train['Usage'] = usage_encoder.fit_transform(X_train['Usage'])
y_train = mouse_encoder.fit_transform(y_train)

# Encode testing data (transform only)
X_test['Gender'] = gender_encoder.transform(X_test['Gender'])
X_test['Usage'] = usage_encoder.transform(X_test['Usage'])
y_test = mouse_encoder.transform(y_test)

# Step 3: Define and Evaluate Models
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=3)
}

model_accuracies = {}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    # Predict on the test set
    y_pred = model.predict(X_test)
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    model_accuracies[model_name] = accuracy

# Display model accuracies
print("\nModel Accuracies on Test Set:")
for model_name, accuracy in model_accuracies.items():
    print(f"{model_name}: {accuracy:.4f}")

# Step 4: Identify the Best Model
best_model_name = max(model_accuracies, key=model_accuracies.get)
best_model = models[best_model_name]
best_model_accuracy = model_accuracies[best_model_name]
print(f"\nBest Model: {best_model_name} with accuracy: {best_model_accuracy:.4f}")


Model Accuracies on Test Set:
Logistic Regression: 0.7433
Decision Tree: 1.0000
Random Forest: 1.0000
K-Nearest Neighbors: 0.9733

Best Model: Decision Tree with accuracy: 1.0000


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [54]:
# Predict New Data
new_data = {
    'Age': [15],          # Example Age
    'Gender': ['Female'], # Example Gender
    'Usage': ['Gaming']     # Example Usage
}

# Convert new data to DataFrame
new_df = pd.DataFrame(new_data)

# Handle unseen categories for 'Gender' and 'Usage'
def safe_transform(column, encoder):
    # Replace unseen categories with the first class (default behavior)
    column = column.apply(lambda x: x if x in encoder.classes_ else encoder.classes_[0])
    return encoder.transform(column)

# Apply safe_transform for 'Gender' and 'Usage'
new_df['Gender'] = safe_transform(new_df['Gender'], gender_encoder)
new_df['Usage'] = safe_transform(new_df['Usage'], usage_encoder)

# Predict the output (Mouse Type) using the best model
predicted_mouse_encoded = best_model.predict(new_df)

# Decode the predicted label back to the original form
predicted_mouse = mouse_encoder.inverse_transform(predicted_mouse_encoded)

# Display the prediction
print(f"\nPredicted Mouse Type for input {new_data}: {predicted_mouse}")


Predicted Mouse Type for input {'Age': [15], 'Gender': ['Female'], 'Usage': ['Gaming']}: ['Corsair Harpoon RGB']
