In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer  # Import SimpleImputer

In [None]:
# Load the dataset
data = pd.read_csv(r"/content/tour_package.csv")  # Replace 'your_dataset.csv' with your dataset file path
data.dropna(inplace=True)

# **LOGISTIC REGRESSION**

In [None]:
# Encode categorical variables
label_encoders = {}
categorical_columns = ['TypeofContact', 'Occupation', 'Gender', 'ProductPitched', 'MaritalStatus', 'Designation']
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

In [None]:
# Define features and target variable
X = data.drop(columns=['CustomerID', 'ProdTaken'])  # Features
y = data['ProdTaken']  # Target variable

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Use SimpleImputer to impute missing values with the mean for numerical columns
numerical_columns = ['Age', 'DurationOfPitch', 'MonthlyIncome']
imputer = SimpleImputer(strategy='mean')
X_train[numerical_columns] = imputer.fit_transform(X_train[numerical_columns])
X_test[numerical_columns] = imputer.transform(X_test[numerical_columns])

In [None]:
# Initialize and train a logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)

Accuracy: 0.8171912832929782
Classification Report:
              precision    recall  f1-score   support

           0       0.82      1.00      0.90       661
           1       0.89      0.10      0.17       165

    accuracy                           0.82       826
   macro avg       0.85      0.55      0.54       826
weighted avg       0.83      0.82      0.75       826



# **Naive Bayes**

In [35]:
from sklearn.naive_bayes import GaussianNB
# Initialize and train a logistic regression model
model_NB = GaussianNB()
model_NB.fit(X_train, y_train)

In [36]:
# Make predictions on the test set
y_pred = model_NB.predict(X_test)

In [37]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)

Accuracy: 0.8365617433414043
Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.95      0.90       661
           1       0.66      0.38      0.48       165

    accuracy                           0.84       826
   macro avg       0.76      0.66      0.69       826
weighted avg       0.82      0.84      0.82       826



# **Decision Tree Classifier**

In [42]:
from sklearn.tree import DecisionTreeClassifier
# Initialize and train a logistic regression model
model_dt = DecisionTreeClassifier()
model_dt.fit(X_train, y_train)

In [43]:
# Make predictions on the test set
y_pred = model_dt.predict(X_test)

In [41]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)

Accuracy: 0.9200968523002422
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.96      0.95       661
           1       0.82      0.76      0.79       165

    accuracy                           0.92       826
   macro avg       0.88      0.86      0.87       826
weighted avg       0.92      0.92      0.92       826



# **RANDOM FOREST CLASSIFIER**

In [44]:
from sklearn.ensemble import RandomForestClassifier
# Initialize and train a RandomForestClassifier model
model_RF = RandomForestClassifier()
model_RF.fit(X_train, y_train)

In [22]:
# Make predictions on the test set
y_pred = model_RF.predict(X_test)

In [23]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(classification_rep)

Accuracy: 0.950363196125908
Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       661
           1       0.98      0.77      0.86       165

    accuracy                           0.95       826
   macro avg       0.96      0.88      0.92       826
weighted avg       0.95      0.95      0.95       826



# **Prediction using Random Forest Classifier**

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier

# Load your dataset
df = pd.read_csv(r'/content/tour_package.csv')  # Replace 'your_dataset.csv' with the actual path to your dataset
df.dropna(inplace=True)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
df.fillna(df.mean(), inplace=True)
# Select relevant features for prediction
features = ['Age', 'DurationOfPitch', 'MonthlyIncome']

# Initialize and train the RandomForestClassifier model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(df[features], df['ProdTaken'])

# Prepare new data for prediction (similar to smartphone example)
new_data = pd.DataFrame({'Age': [41], 'DurationOfPitch': [6], 'MonthlyIncome': [20993]})
new_data.dropna(inplace=True)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
new_data.fillna(new_data.mean(), inplace=True)
# Predict 'ProdTaken' for the new data
predicted_ProdTaken = model.predict(new_data[features])

# Map predicted values to "Yes" (1) or "No" (0)
predicted_ProdTaken_text = ["Yes" if val == 1 else "No" for val in predicted_ProdTaken]

# Add the predicted values to the new data DataFrame
new_data['Predicted_ProdTaken'] = predicted_ProdTaken_text

# Display the new data with predicted 'ProdTaken'
print(new_data)


  df.fillna(df.mean(), inplace=True)


   Age  DurationOfPitch  MonthlyIncome Predicted_ProdTaken
0   41                6          20993                 Yes


# **Prediction using Logistic Regression**

In [24]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

# Load your dataset
df = pd.read_csv(r'/content/tour_package.csv')  # Replace 'your_dataset.csv' with the actual path to your dataset
df.dropna(inplace=True)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
df.fillna(df.mean(), inplace=True)
# Select relevant features for prediction
features = ['Age', 'DurationOfPitch', 'MonthlyIncome']

# Initialize and train the RandomForestClassifier model
model_lo = LogisticRegression()
model_lo.fit(df[features], df['ProdTaken'])

# Prepare new data for prediction (similar to smartphone example)
new_data = pd.DataFrame({'Age': [19], 'DurationOfPitch': [5], 'MonthlyIncome': [2000]})
new_data.dropna(inplace=True)
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
new_data.fillna(new_data.mean(), inplace=True)
# Predict 'ProdTaken' for the new data
predicted_ProdTaken = model_lo.predict(new_data[features])

# Map predicted values to "Yes" (1) or "No" (0)
predicted_ProdTaken_text = ["Yes" if val == 1 else "No" for val in predicted_ProdTaken]

# Add the predicted values to the new data DataFrame
new_data['Predicted_ProdTaken'] = predicted_ProdTaken_text

# Display the new data with predicted 'ProdTaken'
print(new_data)

   Age  DurationOfPitch  MonthlyIncome Predicted_ProdTaken
0   19                5           2000                  No


  df.fillna(df.mean(), inplace=True)


dt