# Explanation:
Static Data:

The data dictionary contains features age, salary, experience, city, and a target variable purchased for classification.
Initialization:

The ClassificationModels class initializes with the DataFrame created from the static data and sets up necessary preprocessing tools and classification models (Logistic Regression, Decision Tree, Random Forest).
Data Preprocessing:

The preprocess_data method encodes categorical variables (city), scales numerical features, and splits the data into training and test sets.
Model Training:

The train_models method trains each of the specified classification models on the training data.
Model Evaluation:

The evaluate_models method evaluates each trained model using accuracy, confusion matrix, and classification report.
Cross-Validation:

The cross_validation method performs 5-fold cross-validation on each model, printing the average accuracy.
Prediction:

The predict method makes predictions on new data using the trained models. The new data is first encoded and scaled similarly to the training data.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Define static data
data = {
    'age': [25, 32, 45, 28, 34, 50, 29, 40, 35, 60],
    'salary': [50000, 60000, 80000, 45000, 70000, 90000, 48000, 75000, 68000, 100000],
    'experience': [2, 7, 10, 4, 6, 15, 3, 8, 5, 20],
    'city': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Los Angeles', 'New York', 'Chicago', 'New York', 'Los Angeles', 'Chicago'],
    'purchased': [0, 1, 0, 0, 1, 1, 0, 1, 1, 0]  # Binary target variable for classification
}

# Convert static data to DataFrame
df = pd.DataFrame(data)

# Define the classification models class
class ClassificationModels:
    def __init__(self, dataframe):
        self.df = dataframe
        self.label_encoder = LabelEncoder()
        self.scaler = StandardScaler()
        self.X = self.df.drop('purchased', axis=1)
        self.y = self.df['purchased']
        self.models = {
            'Logistic Regression': LogisticRegression(),
            'Decision Tree': DecisionTreeClassifier(),
            'Random Forest': RandomForestClassifier()
        }
    
    def preprocess_data(self):
        # Encode categorical variables
        self.df['city_encoded'] = self.label_encoder.fit_transform(self.df['city'])
        self.X = self.df[['age', 'salary', 'experience', 'city_encoded']]
        
        # Scale the features
        self.X_scaled = self.scaler.fit_transform(self.X)
        
        # Split data into training and test sets
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X_scaled, self.y, test_size=0.2, random_state=42)
    
    def train_models(self):
        self.trained_models = {}
        for name, model in self.models.items():
            model.fit(self.X_train, self.y_train)
            self.trained_models[name] = model
    
    def evaluate_models(self):
        for name, model in self.trained_models.items():
            y_pred = model.predict(self.X_test)
            accuracy = accuracy_score(self.y_test, y_pred)
            conf_matrix = confusion_matrix(self.y_test, y_pred)
            class_report = classification_report(self.y_test, y_pred)
            print(f"{name}:")
            print(f"Accuracy: {accuracy}")
            print(f"Confusion Matrix:\n{conf_matrix}")
            print(f"Classification Report:\n{class_report}")
            print("\n")
    
    def cross_validation(self):
        for name, model in self.models.items():
            cv_scores = cross_val_score(model, self.X_scaled, self.y, cv=5, scoring='accuracy')
            print(f"{name} Cross-Validation Accuracy: {np.mean(cv_scores)}")
            print("\n")
    
    def predict(self, new_data):
        # Scale the new data
        new_data_scaled = self.scaler.transform(new_data)
        predictions = {}
        for name, model in self.trained_models.items():
            predictions[name] = model.predict(new_data_scaled)
        return predictions

# Initialize the classification models class
clf_models = ClassificationModels(df)

# Preprocess data
clf_models.preprocess_data()

# Train models
clf_models.train_models()

# Evaluate models
clf_models.evaluate_models()

# Cross-validation
print("Cross-Validation Scores:")
clf_models.cross_validation()

# Predict with new data
new_data = pd.DataFrame({
    'age': [27, 55],
    'salary': [52000, 85000],
    'experience': [4, 12],
    'city': ['Chicago', 'New York']
})

# Encode and scale new data
new_data['city_encoded'] = clf_models.label_encoder.transform(new_data['city'])
new_data_scaled = clf_models.scaler.transform(new_data[['age', 'salary', 'experience', 'city_encoded']])

# Predictions
predictions = clf_models.predict(new_data_scaled)
print("Predictions:")
print(predictions)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Logistic Regression:
Accuracy: 0.0
Confusion Matrix:
[[0 0]
 [2 0]]
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00       2.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



Decision Tree:
Accuracy: 0.5
Confusion Matrix:
[[0 0]
 [1 1]]
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



Random Forest:
Accuracy: 1.0
Confusion Matrix:
[[2]]
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00  

