In [None]:
import pandas as pd
print("Pandas imported")

import numpy as np
print("NumPy imported")


In [None]:
# Task 1: Working with Series
# 1.Create a Pandas Series from the following list: data = [25, 30, 35, 40, 45]
# 2. Write a Python program to:
# o Assign custom indices ['A', 'B', 'C', 'D', 'E'] to the Series.
# o Display the first three elements of the Series.
# o Calculate the mean, median, and standard deviation of the Series.

import pandas as pd
values = [25, 30, 35, 40, 45]
series_obj = pd.Series(values)
series_obj.index = ['A', 'B', 'C', 'D', 'E']

# Display
print("First three elements of the Series:")
print(series_obj[:3])

avg = series_obj.mean()
med = series_obj.median()
std_dev = series_obj.std()

# Display
print()
print(f"Mean: {avg}")
print(f"Median: {med}")
print(f"Standard Deviation: {std_dev}")


In [None]:
# Task 2: Creating and Inspecting DataFrames
# 1.Create a Pandas DataFrame with the following data:
# Name Age Gender Marks 
# Alice 20 Female 85
# Bob 22 Male 78
# Carol 19 Female 92
# David 21 Male 74
# Eve 20 Female 88
# 2. Write a Python program to:
# o Display the first two rows of the DataFrame.
# o Display the column names, data types, and summary statistics.
# o Add a new column Passed that contains True if Marks >= 80 and False otherwise.

student_data = {
    'Name': ['Alice', 'Bob', 'Carol', 'David', 'Eve'],
    'Age': [20, 22, 19, 21, 20],
    'Gender': ['Female', 'Male', 'Female', 'Male', 'Female'],
    'Marks': [85, 78, 92, 74, 88]
}

student_df = pd.DataFrame(student_data)
print("First two rows of the DataFrame:")
print(student_df.head(2))

print("\nColumn Names:")
print(student_df.columns.tolist())

print("\nData Types:")
print(student_df.dtypes)

print("\nSummary Statistics:")
print(student_df.describe(include='all'))

student_df['Passed'] = student_df['Marks'] >= 80

print("\n New Column 'Passed' added:")
print(student_df)


In [None]:
# Task 3: Data Selection and Filtering
# 1. Using the DataFrame from Task 2, write a Python program to:
# o Select and display the Name and Marks columns.
# o Filter and display records where Marks > 80.
# o Display the record of the student with the highest marks.

print("Name and Marks columns:")
print(student_df[['Name', 'Marks']])

print("\nStudents with Marks > 80:")
print(student_df[student_df['Marks'] > 80])

top_score = student_df['Marks'].max()
top_student = student_df[student_df['Marks'] == top_score]

print("\nStudent with the highest marks:")
print(top_student)


In [None]:
# Task 4: Handling Missing Data
# 1. Modify the DataFrame from Task 2 by introducing missing values:
# 2. df.loc[1, 'Marks'] = None
# 3. df.loc[4, 'Age'] = None
# 4. Write a Python program to:
# o Identify missing values in the DataFrame.
# o Fill missing values in the Marks column with the column's mean.
# o Drop rows where the Age column has missing values.

student_df.loc[1, 'Marks'] = None  
student_df.loc[4, 'Age'] = None     

print("Missing values in the DataFrame:")
print(student_df.isnull())

mean_marks = student_df['Marks'].mean()
student_df['Marks'] = student_df['Marks'].fillna(mean_marks)
print("\nDataFrame after filling missing Marks with mean:")
print(student_df)

cleaned_df = student_df.dropna(subset=['Age'])
print("\nDataFrame after dropping rows with missing Age:")
print(cleaned_df)


In [None]:
# Task 5: Grouping and Aggregation
# 1. Using the DataFrame from Task 2, write a Python program to:
# o Group the data by Gender and calculate the mean age and marks for each gender.
# o Count the number of students in each gender group.

group_means = student_df.groupby('Gender')[['Age', 'Marks']].mean()
print("Mean Age and Marks by Gender:")
print(group_means)

gender_counts = student_df['Gender'].value_counts()
print("\nNumber of students in each gender group:")
print(gender_counts)


In [None]:
# Task 7: General
# 1. Download a sample dataset from Kaggle or use any public dataset.
# 2. Write a Python program to:
# o Load the dataset using Pandas.
# o Perform exploratory data analysis (EDA) by summarizing key statistics, checking for missing values, and visualizing data trends using Matplotlib or Seaborn.
# o Document your findings in Markdown or comments.

import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("electric_vehicles_spec_2025.csv")

print("\nSummary statistics:")
print(df.describe())

print("\nMissing values in each column:")
print(df.isnull().sum())

df_speed = df[['model', 'top_speed_kmh']].dropna()
# Sort by top speed and get the top 10 models
top_models = df_speed.sort_values(by='top_speed_kmh', ascending=False).drop_duplicates('model').head(10)

# Plot
plt.figure(figsize=(12, 6))
plt.bar(top_models['model'], top_models['top_speed_kmh'], color='skyblue')
plt.title("Top 10 Fastest Electric Vehicle Models")
plt.xlabel("Model")
plt.ylabel("Top Speed (km/h)")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--')
plt.tight_layout()
plt.show()

# Findings
# - GranTurismo Folgore leads with a top speed of over 320 km/h.
# - Tesla holds 3 strong positions: Model S Plaid, Model X Plaid, and Model 3 Performance.
# - All vehicles in the top 10 are luxury or performance-focused, indicating a clear trend in high-speed EV development.