In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/CSE 475 ML/Lab - 04/LAB TASKS/cinema_hall_ticket_sales.csv")

df

In [None]:
df.head(15)

In [None]:
df.tail()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df['Ticket_ID'].unique()

In [None]:
df['Movie_Genre'].unique()

In [None]:
df['Seat_Type'].unique()

In [None]:
df['Number_of_Person'].unique()

In [None]:
df['Purchase_Again'].unique()

In [None]:
# Manually map categorical values to numerical values
mappings = {
    "Movie_Genre": {"Sci-Fi": 4,"Comedy": 3, "Drama": 2, "Horror": 1, "Action": 0},
    "Seat_Type": {"Standard": 0, "VIP": 1, "Premium": 2},
    "Number_of_Person": {"2": 0, "3": 1, "4": 2,"5":3,"6":4,"7":5, "Alone": 6},
    "Purchase_Again": {"Yes" : 1, "No":0},
}

df.replace(mappings, inplace=True)

In [None]:
df

In [None]:
df.drop(columns=['Ticket_ID'],inplace=True)

In [None]:
correlation = df.corr()
correlation

In [None]:
plt.figure(figsize=(12,8), dpi=77)
sns.heatmap(correlation, linecolor='white',linewidths=0.1, annot=True)
plt.title('Correlation Matrix'.upper(), size=19, pad=13)
plt.xticks(rotation=33)
plt.show()

In [None]:

Age_counts = df['Age'].value_counts()
Age_counts

# Create a pie chart
plt.figure(figsize=(15, 15))  # Set the figure size
plt.pie(Age_counts, labels=Age_counts.index, autopct='%1.1f%%', startangle=140)

# Add a title
plt.title('Age Distribution')

# Show the pie chart
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Create age groups
bins = [0, 20, 40, 60]  # Define age bins
labels = ['0-20', '20-40', '40-60']  # Labels for age groups

# Bin the 'Age' column into the defined bins
df['Age_group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)

# Get the counts for each age group
Age_counts = df['Age_group'].value_counts()

# Create a pie chart
plt.figure(figsize=(10, 10))  # Set the figure size
plt.pie(Age_counts, labels=Age_counts.index, autopct='%1.1f%%', startangle=140)

# Add a title
plt.title('Age Distribution by Group')

# Show the pie chart
plt.show()


Models 1) Random Forest

In [None]:
# Prepare data
y = df["Purchase_Again"]
X = df.drop("Purchase_Again", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Train Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

In [None]:
y_pred = rf.predict(X_test)
y_pred

In [None]:
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

In [None]:
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

2) SVM

# Linear

In [None]:
from sklearn.svm import SVC

# Assuming X_train and y_train are your feature and target datasets
model = SVC(kernel='linear', random_state=42)
model.fit(X_train, y_train)


In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Calculate recall (sensitivity)
recall = recall_score(y_test, y_pred)
print("Recall (Sensitivity):", recall)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

In [None]:
#confuison matrix
import matplotlib.pyplot as plt
from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(y_test,y_pred)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ['Negative', 'Positive'])
cm_display.plot()
plt.show()

Second Approach using rbf kernel only

In [None]:
from sklearn.svm import SVC

In [None]:
# Initialize the SVM model (with a rbf kernel)

#kernel='rbf': Uses the Radial Basis Function (RBF) kernel, which helps capture non-linear decision boundaries.

model = SVC(kernel='rbf', random_state=42)

# Train the model
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Calculate recall (sensitivity)
recall = recall_score(y_test, y_pred)
print("Recall (Sensitivity):", recall)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

In [None]:
#confuison matrix
import matplotlib.pyplot as plt
from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(y_test,y_pred)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ['Negative', 'Positive'])
cm_display.plot()
plt.show()

Third approach with some other parameter

In [None]:
from sklearn.svm import SVC

In [None]:
#C=1.0: Sets the regularization parameter, controlling the trade-off between a smooth decision boundary and correctly classifying training points.
#small c allows more misclassification, soft margin
#large c can lead to overfitting, hard margin

model = SVC(kernel='rbf', C=1.0, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Calculate recall (sensitivity)
recall = recall_score(y_test, y_pred)
print("Recall (Sensitivity):", recall)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

In [None]:
#confuison matrix
import matplotlib.pyplot as plt
from sklearn import metrics
confusion_matrix = metrics.confusion_matrix(y_test,y_pred)
cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ['Negative', 'Positive'])
cm_display.plot()
plt.show()

4th approach Poly

In [None]:
from sklearn.svm import SVC

# Polynomial Kernel
model_poly = SVC(kernel='poly', degree=5, random_state=42)
model_poly.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Calculate recall (sensitivity)
recall = recall_score(y_test, y_pred)
print("Recall (Sensitivity):", recall)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

5th approach Sigmoid

In [None]:
from sklearn.svm import SVC

# Sigmoid Kernel
model_sigmoid = SVC(kernel='sigmoid', random_state=42)
model_sigmoid.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Calculate recall (sensitivity)
recall = recall_score(y_test, y_pred)
print("Recall (Sensitivity):", recall)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

6th approach Gamma

In [None]:
from sklearn.svm import SVC
from sklearn.metrics.pairwise import rbf_kernel

# Assuming X_train is your training data
# Compute the RBF kernel matrix
K = rbf_kernel(X_train, gamma=0.5)  # Set gamma as a float value (you can adjust this based on your data)

# Create the SVM model with the precomputed kernel
model_precomputed = SVC(kernel='precomputed', random_state=42)

# Fit the model using the precomputed kernel matrix 'K'
model_precomputed.fit(K, y_train)

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

print(y_pred)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Calculate precision
precision = precision_score(y_test, y_pred)
print("Precision:", precision)

# Calculate recall (sensitivity)
recall = recall_score(y_test, y_pred)
print("Recall (Sensitivity):", recall)

# Calculate F1-score
f1 = f1_score(y_test, y_pred)
print("F1-Score:", f1)

7th approach Grid Search

Decision Tree

In [None]:
import pandas as pd
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

In [None]:
dtree = DecisionTreeClassifier()
dtree = dtree.fit(X, y)

In [None]:
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

# Define the features
features = ['Age', 'Ticket_Price', 'Movie_Genre', 'Seat_Type', 'Number_of_Person', 'Purchase_Again']


plt.figure(figsize=(20, 10))


plot_tree(
    dtree,
    feature_names=features,
    filled=True,
    rounded=True,
    fontsize=12,
    precision=2,
    class_names=["Class 0", "Class 1", "Class 2"],
    label="all",
    proportion=True
)

plt.title("Decision Tree for Movie Purchase Prediction", fontsize=16)


plt.show()
