In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Generate Synthetic Data

# Define the number of years and number of samples
years = np.arange(2010, 2025)
n_samples = 1000

# Create synthetic feature data
np.random.seed(42)
data = {
    'Year': np.random.choice(years, n_samples),
    'Region': np.random.choice(['North America', 'Europe', 'Asia'], n_samples),
    'Price': np.random.uniform(30000, 80000, n_samples),
    'Battery Capacity': np.random.uniform(40, 100, n_samples),
    'Sales': np.random.choice(['High', 'Medium', 'Low'], n_samples)
}

df = pd.DataFrame(data)

# Convert categorical features to numerical
df = pd.get_dummies(df, columns=['Region', 'Sales'])

# Define features and target
X = df.drop(columns=['Sales_High', 'Sales_Medium', 'Sales_Low'])
y = df[['Sales_High', 'Sales_Medium', 'Sales_Low']].idxmax(axis=1)

# Step 2: Train a Random Classifier

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Step 3: Evaluate the Classifier

# Make predictions
y_pred = clf.predict(X_test)

# Print classification report and accuracy
print("Classification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")

Classification Report:
              precision    recall  f1-score   support

  Sales_High       0.29      0.22      0.25        99
   Sales_Low       0.29      0.40      0.33        93
Sales_Medium       0.40      0.36      0.38       108

    accuracy                           0.33       300
   macro avg       0.33      0.33      0.32       300
weighted avg       0.33      0.33      0.32       300

Accuracy: 0.33
