In [4]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np

In [5]:
dataset = np.array([["Salty","Hot","Soft","No"],
                   ["Spicy","Hot","Soft","No"],
                   ["Spciyy","Hot","Hard","Yes"],
                   ["Spicy","Cold","Hard","No"],
                   ["Spicy","Hot","Hard","Yes"],
                   ["Sweet","Cold","Soft","Yes"],
                   ["Salty","Cold","Soft","No"],
                   ["Sweet","Hot","Soft","Yes"],
                   ["Spicy","Cold","Soft","Yes"],
                   ["Salty","Hot","Hard","Yes"]])

In [14]:
# split features and target
features = dataset[:,:-1] #taste,temperature, texture
targets = dataset[:,-1] # eat

In [15]:
enc = preprocessing.OrdinalEncoder()
encoded_features = enc.fit_transform(features)

In [16]:
encoded_features

array([[0., 1., 1.],
       [2., 1., 1.],
       [1., 1., 0.],
       [2., 0., 0.],
       [2., 1., 0.],
       [3., 0., 1.],
       [0., 0., 1.],
       [3., 1., 1.],
       [2., 0., 1.],
       [0., 1., 0.]])

In [18]:
# split into test and train
feature_train,feature_test,target_train,target_test = train_test_split(encoded_features,targets,test_size=0.10)

In [24]:
# inputs
guess = np.array([["Salty","Hot","Hard"]])

In [21]:
# Transform the guess using the same encoder (fit on all features)
encoded_guess = enc.transform(guess)

In [22]:
encoded_guess

array([[0., 1., 0.]])

In [23]:
# Train the decision tree classifier
model = DecisionTreeClassifier()
fitted = model.fit(feature_train, target_train)

In [30]:
# Make predictions on the encoded guess
predictions = fitted.predict(encoded_guess)
predictions[0]

'Yes'

In [50]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

Initial Data Preview:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.05

In [51]:
# Load the dataset
df = pd.read_csv("titanic.csv") # Path to the uploaded Titanic dataset

In [52]:
# Display the first few rows to understand the data
print("Initial Data Preview:")
print(df.head())

Initial Data Preview:
   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.05

In [53]:
# Data Preprocessing
# Dropping irrelevant columns (e.g., 'Name', 'Ticket', 'Cabin' might not be useful for prediction)
df = df.drop(columns=['Name', 'Ticket', 'Cabin'], errors='ignore')

In [54]:
# Handling missing values - Fill missing 'Age' with median, 'Embarked' with mode
df['Age'].fillna(df['Age'].median(), inplace=True)
df['Embarked'].fillna(df['Embarked'].mode()[0], inplace=True)

In [55]:
# Converting categorical columns to numerical (e.g., 'Sex', 'Embarked')
df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})
df['Embarked'] = df['Embarked'].map({'S': 0, 'C': 1, 'Q': 2})

In [56]:
# Dropping any remaining rows with missing values
df.dropna(inplace=True)

In [57]:
# Check if the dataset is empty
print(f"Number of samples after preprocessing: {df.shape[0]}")
if df.shape[0] == 0:
    raise ValueError("The dataset is empty after preprocessing. Check the preprocessing steps.")

Number of samples after preprocessing: 891


In [58]:
# Defining features (X) and target (y)
X = df.drop(columns=['Survived'])  # Features
y = df['Survived']  # Target

In [59]:
# Check if there are valid samples in the feature matrix
print(f"Shape of X (features): {X.shape}")
print(f"Shape of y (target): {y.shape}")

Shape of X (features): (891, 8)
Shape of y (target): (891,)


In [60]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [61]:
# Initializing the Decision Tree Classifier with 'max_depth' parameter to prevent overfitting (tightening)
clf = DecisionTreeClassifier(max_depth=4, random_state=42)

In [62]:
# Training the model
clf.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=4, random_state=42)

In [63]:
# Making predictions
y_pred = clf.predict(X_test)

In [64]:
# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [65]:
# Displaying results
print(f"Accuracy: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Accuracy: 0.8209
Confusion Matrix:
[[138  19]
 [ 29  82]]
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.88      0.85       157
           1       0.81      0.74      0.77       111

    accuracy                           0.82       268
   macro avg       0.82      0.81      0.81       268
weighted avg       0.82      0.82      0.82       268

