In [1]:
from imblearn.over_sampling import SMOTE
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)
import matplotlib.pyplot as plt

# Load the CSV file
data = pd.read_csv('ECS171_FINAL_DATASET.csv')

# Separate features and target
X = data.drop(columns=['Revenue'])
y = data['Revenue']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Display the class distribution after SMOTE
print("Class distribution before SMOTE:")
print(y_train.value_counts())
print("\nClass distribution after SMOTE:")
print(pd.Series(y_train_resampled).value_counts())

# Train the Decision Tree Classifier on the resampled data
classification_model = DecisionTreeClassifier(random_state=42)
classification_model.fit(X_train_resampled, y_train_resampled)

# Make predictions on the test data
y_pred = classification_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model accuracy:", accuracy)

# Plot the decision tree
# plt.figure(figsize=(20, 10))
# plot_tree(classification_model, feature_names=X.columns, filled=True, class_names=True)
# plt.show()

# Classification Report and Confusion Matrix
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Cross-Validation for Classification
cv_scores_clf = cross_val_score(classification_model, X, y, cv=5)
print(f"\nCross-Validation Scores (Classification): {cv_scores_clf}")
print(f"Mean CV Score (Classification): {cv_scores_clf.mean()}")


ModuleNotFoundError: No module named 'imblearn'