In [2]:
!pip install pandas scikit-learn fuzzywuzzy

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from fuzzywuzzy import process
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Load the dataset
file_path = 'Titanic-Dataset.csv'  # replace with your file path in Google Colab
titanic_data = pd.read_csv(file_path)

# Preprocess the data
def preprocess_data(data):
    data = data.copy()
    # Fill missing values
    data['Age'].fillna(data['Age'].median(), inplace=True)
    data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)
    data['Fare'].fillna(data['Fare'].median(), inplace=True)
    # Drop columns that are not needed
    data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], inplace=True)
    # Convert categorical variables to numeric
    label_encoder = LabelEncoder()
    data['Sex'] = label_encoder.fit_transform(data['Sex'])
    data['Embarked'] = label_encoder.fit_transform(data['Embarked'])
    return data

# Preprocess the dataset
processed_data = preprocess_data(titanic_data)

# Split the data into features and target
X = processed_data.drop(columns=['Survived'])
y = processed_data['Survived']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the predictive model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Function to display details and prediction
def predict_survival(name, pclass):
    # Find the closest match for the passenger's name
    name_matches = process.extract(name, titanic_data['Name'], limit=1)
    if not name_matches:
        return "Passenger not found.", ""
    closest_name = name_matches[0][0]
    # Find the passenger's details
    passenger = titanic_data[(titanic_data['Name'] == closest_name) & (titanic_data['Pclass'] == pclass)]
    if passenger.empty:
        return "Passenger not found.", ""
    passenger_details = passenger.iloc[0]
    # Display details and prediction based on the actual data
    survived = 'Survived' if passenger_details['Survived'] == 1 else 'Did not survive'
    passenger_details_dict = passenger_details.to_dict()
    passenger_details_dict['Survived'] = survived
    return passenger_details_dict, survived

# Function to analyze the data based on user question
def analyze_data(question):
    question = question.lower()
    if "how many people survived" in question:
        num_survived = titanic_data['Survived'].sum()
        return f"{num_survived} people survived."
    elif "how many people did not survive" in question:
        num_not_survived = (titanic_data['Survived'] == 0).sum()
        return f"{num_not_survived} people did not survive."
    elif "average age of survivors" in question:
        avg_age_survived = titanic_data[titanic_data['Survived'] == 1]['Age'].mean()
        return f"The average age of survivors is {avg_age_survived:.2f}."
    elif "average age of non-survivors" in question:
        avg_age_not_survived = titanic_data[titanic_data['Survived'] == 0]['Age'].mean()
        return f"The average age of non-survivors is {avg_age_not_survived:.2f}."
    elif "percentage of female survivors" in question:
        num_female_survivors = titanic_data[(titanic_data['Survived'] == 1) & (titanic_data['Sex'] == 'female')].shape[0]
        num_female_total = titanic_data[titanic_data['Sex'] == 'female'].shape[0]
        percent_female_survivors = num_female_survivors / num_female_total * 100
        return f"The percentage of female survivors is {percent_female_survivors:.2f}%."
    elif "percentage of male survivors" in question:
        num_male_survivors = titanic_data[(titanic_data['Survived'] == 1) & (titanic_data['Sex'] == 'male')].shape[0]
        num_male_total = titanic_data[titanic_data['Sex'] == 'male'].shape[0]
        percent_male_survivors = num_male_survivors / num_male_total * 100
        return f"The percentage of male survivors is {percent_male_survivors:.2f}%."
    elif "survivors by class" in question:
        survivors_by_class = titanic_data[titanic_data['Survived'] == 1]['Pclass'].value_counts(normalize=True) * 100
        return survivors_by_class.to_string()
    elif "non-survivors by class" in question:
        non_survivors_by_class = titanic_data[titanic_data['Survived'] == 0]['Pclass'].value_counts(normalize=True) * 100
        return non_survivors_by_class.to_string()
    else:
        return "I'm sorry, I can only answer specific questions about the Titanic dataset."

# Test the function with user input
name = input("Enter passenger name: ")
pclass = int(input("Enter ticket class (1, 2, or 3): "))
details, prediction = predict_survival(name, pclass)
print(f"Details: {details}")
print(f"Prediction: {prediction}")

# Ask the user if they have any questions
question = input("If you have any doubts or questions, you can ask: ")
answer = analyze_data(question)
print(answer)




Enter passenger name: Rice, Master. Eugene
Enter ticket class (1, 2, or 3): 3
Details: {'PassengerId': 17, 'Survived': 'Did not survive', 'Pclass': 3, 'Name': 'Rice, Master. Eugene', 'Sex': 'male', 'Age': 2.0, 'SibSp': 4, 'Parch': 1, 'Ticket': '382652', 'Fare': 29.125, 'Cabin': nan, 'Embarked': 'Q'}
Prediction: Did not survive
If you have any doubts or questions, you can ask: percentage of male survivors
The percentage of male survivors is 18.89%.
