In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
df = pd.read_csv('Marketing Campaign.csv')  # Adjust the filename as necessary

# Prepare the features and target
features = ['Campaign_Type', 'Target_Audience', 'Duration', 'Channel_Used', 'Engagement_Score', 'Customer_Segment']
target = 'Conversion_Rate'

# Encode categorical variables
le = LabelEncoder()
for feature in features:
    if df[feature].dtype == 'object':
        df[feature] = le.fit_transform(df[feature])

X = df[features]
y = df[target]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

# Identify most important features
feature_importance = pd.DataFrame({'feature': features, 'importance': model.feature_importances_})
feature_importance = feature_importance.sort_values('importance', ascending=False)
print("\nMost important features:")
print(feature_importance)

# Predict future performance
# Assuming 'future_campaigns' is a DataFrame with the same features as X
# future_predictions = model.predict(future_campaigns)

Mean Squared Error: 0.001740283058960598
R-squared Score: 0.7605002115983994

Most important features:
            feature  importance
1   Target_Audience    0.925517
4  Engagement_Score    0.018836
5  Customer_Segment    0.015240
3      Channel_Used    0.014340
0     Campaign_Type    0.013573
2          Duration    0.012494
