---
title: "Coding Challenge - DS 250"
subtitle: "Timed Quiz: Q1–Q4"
author: "Maia Faith Chambers"
format:
  html:
    self-contained: true
    toc: true
    toc-depth: 2
    title-block-banner: true
    code-fold: true
    code-summary: "Show Code"
    code-tools:
      toggle: true
      caption: See Code
execute:
  kernel: python312
  warning: false
---

In [None]:
import sys
print(sys.executable)

In [None]:
# Imports (reuse from project)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Styling
sns.set(style="whitegrid")

# Load dataset (adjust file path if needed)
names = pd.read_csv("names_year.csv")  # Or whatever name is given

In [None]:
# Plot
plt.figure(figsize=(10,6))
sns.lineplot(data=names, x="year", y="n", hue="name")
plt.title("Name Popularity Over Time")
plt.xlabel("Year")
plt.ylabel("Number of Babies")
plt.axvline(2000, color='gray', linestyle='--')
plt.text(2001, names['n'].max() * 0.8, "Millennium", rotation=90)
plt.tight_layout()
plt.show()

In [None]:
problem = pd.Series([np.nan, 18, 22, 45, 31, np.nan, 85, 38, 129, 8000, 22, 2])
std_val = problem.std()
filled = problem.fillna(std_val)
result = round(filled.mean(), 2)
print("Final Mean:", result)

In [None]:
ages = pd.Series(["10-25", "10-25", "26-35", "56-85", "0-9", "46-55",
                  "56-85", "0-9", "26-35", "56-85", "0-9", "10-25"])
age_df = ages.value_counts().reset_index()
age_df.columns = ["Age Range", "Count"]

plt.figure(figsize=(8,5))
sns.barplot(data=age_df, x="Age Range", y="Count", order=sorted(age_df["Age Range"]))
plt.title("Age Range Frequency")
plt.xlabel("Age Range")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

In [None]:
# Load cleaned Star Wars dataset
df = pd.read_csv("star_wars_clean.csv")
df = df.dropna()
y = df["female"]
X = pd.get_dummies(df.drop(columns=["female"]))

# Split and Train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2022)
model = RandomForestClassifier(random_state=2022)
model.fit(X_train, y_train)

# Accuracy
acc = accuracy_score(y_test, model.predict(X_test))
print(f"Accuracy: {acc:.2%}")

# Feature Importance Plot
importances = pd.Series(model.feature_importances_, index=X.columns)
top10 = importances.nlargest(10).reset_index()
top10.columns = ["Feature", "Importance"]

plt.figure(figsize=(8,5))
sns.barplot(data=top10, x="Importance", y="Feature", palette="viridis")
plt.title("Top 10 Important Features")
plt.tight_layout()
plt.show()

In [None]:
# Load cleaned Star Wars dataset
df = pd.read_csv("star_wars_clean.csv")
df = df.dropna()
y = df["female"]
X = pd.get_dummies(df.drop(columns=["female"]))

# Split and Train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=2022)
model = RandomForestClassifier(random_state=2022)
model.fit(X_train, y_train)

# Accuracy
acc = accuracy_score(y_test, model.predict(X_test))
print(f"Accuracy: {acc:.2%}")

# Feature Importance Plot
importances = pd.Series(model.feature_importances_, index=X.columns)
top10 = importances.nlargest(10).reset_index()
top10.columns = ["Feature", "Importance"]

plt.figure(figsize=(8,5))
sns.barplot(data=top10, x="Importance", y="Feature", palette="viridis")
plt.title("Top 10 Important Features")
plt.tight_layout()
plt.show()