In [13]:
# 1. Import the "Power Tools"
import pandas as pd # Helps us manage data like Excel
from sklearn.model_selection import train_test_split # Helps us split data for testing
from sklearn.ensemble import RandomForestClassifier # The AI brain (Decision Tree)
from sklearn.metrics import accuracy_score # Grading our homework

# 2. Load the Data (Directly from the web)
# We are grabbing the raw data from a public GitHub repository
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
data = pd.read_csv(url)

# 3. Clean the Data (The most important part of AI!)
# We only want numbers. We are dropping "Names" and "Tickets" because they don't help predict survival.
data = data[['Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]

# Convert 'Sex' to numbers (Male = 0, Female = 1) because AI only understands math
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})

# Fill in missing Ages with the average age (AI crashes if there are blanks)
data['Age'] = data['Age'].fillna(data['Age'].mean())

# 4. Split the Data
# X is the data we study (Class, Age, Sex, etc.)
# y is the answer key (Survived or Died)
X = data.drop('Survived', axis=1)
y = data['Survived']

# Split: 80% for studying (train), 20% for the final exam (test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Train the Model
model = RandomForestClassifier(n_estimators=100)
model.fit(X_train, y_train)

# 6. Test the Model
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)

print(f"ðŸŽ‰ Success! Your AI Model is {accuracy * 100:.2f}% accurate at predicting survival.")

ðŸŽ‰ Success! Your AI Model is 81.01% accurate at predicting survival.
