# Task 1: Web Scrapping

In [11]:
URL = 'https://en.wikipedia.org/wiki/Passengers_of_the_RMS_Titanic'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(URL, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

In [5]:
table = soup.find('table', {'class': 'wikitable'})

if table is None:
    print("❌ Table not found. Check the URL or class name.")
    exit()

In [6]:
# Extract header cells (th)
header_row = table.find('tr')
headers = [header.get_text(strip=True) for header in header_row.find_all(['th', 'td'])]

In [7]:
# Extract all rows
rows = table.find_all('tr')

In [9]:
data = []
for row in rows[1:]:
    cols = row.find_all(['th', 'td'])
    cols = [col.get_text(strip=True) for col in cols]
    data.append(cols)

In [10]:
df = pd.DataFrame(data, columns=headers)

df.to_csv('titanic_passengers_scraped.csv', index=False)

html_table = df.to_html(index=False)

html_report = f"""
<!DOCTYPE html>
<html>
<head>
    <title>Titanic Passengers Report</title>
    <style>
        body {{
            font-family: Arial, sans-serif;
            margin: 40px;
        }}
        h1 {{
            color: #2E86C1;
        }}
        table {{
            border-collapse: collapse;
            width: 100%;
        }}
        th, td {{
            border: 1px solid #ddd;
            padding: 8px;
        }}
        th {{
            background-color: #2E86C1;
            color: white;
        }}
        tr:nth-child(even) {{background-color: #f2f2f2;}}
    </style>
</head>
<body>
    <h1>Titanic Passengers Report</h1>
    {html_table}
</body>
</html>
"""

with open('titanic_report.html', 'w', encoding='utf-8') as f:
    f.write(html_report)

print("✅ Titanic passenger data scraped, CSV and HTML report generated.")


✅ Titanic passenger data scraped, CSV and HTML report generated.
