In [13]:
import pandas as pd
# Assuming you have the data in a dictionary or list format:
data = {
  "Name": ["Alice", "Bob", "Charlie", "David", "Emily"],
  "Age": [30, 25, 42, 18, 28],
  "City": ["New York", "London", "Paris", "Berlin", "Tokyo"],
  "Salary": [80000, 75000, 100000, 50000, 90000]
}

# Create a DataFrame from the data
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv("data.csv", index=False)




**DATA** **MANIPULATION**

**Filtering** **data**

In [14]:
#Filter Adulta(age>=18):
adults_df = df[df["Age"] >= 18]
print(adults_df)


      Name  Age      City  Salary
0    Alice   30  New York   80000
1      Bob   25    London   75000
2  Charlie   42     Paris  100000
3    David   18    Berlin   50000
4    Emily   28     Tokyo   90000


In [11]:
#Filter for people making over 80,000:
high_earners_df = df[df["Salary"] > 80000]
print(high_earners_df)

      Name  Age   City  Salary
2  Charlie   42  Paris  100000
4    Emily   28  Tokyo   90000


In [12]:
#Filter for people from New York or London:
specific_cities_df = df[(df["City"] == "New York") | (df["City"] == "London")]
print(specific_cities_df)

    Name  Age      City  Salary
0  Alice   30  New York   80000
1    Bob   25    London   75000


***Handling Missing Values ***

In [16]:
#Check for missing values:
print(df.isnull().sum())

Name      0
Age       0
City      0
Salary    0
dtype: int64


In [17]:
#Drop rows with missing values:
df_no_missing = df.dropna()
print(df_no_missing)

      Name  Age      City  Salary
0    Alice   30  New York   80000
1      Bob   25    London   75000
2  Charlie   42     Paris  100000
3    David   18    Berlin   50000
4    Emily   28     Tokyo   90000


**Calculating** **Summary** **Statistics**

In [20]:
#Get descriptive statistics for all numeric columns:
print(df.describe())

             Age         Salary
count   5.000000       5.000000
mean   28.600000   79000.000000
std     8.763561   18841.443681
min    18.000000   50000.000000
25%    25.000000   75000.000000
50%    28.000000   80000.000000
75%    30.000000   90000.000000
max    42.000000  100000.000000


In [21]:
#Find the minimum and maximum salary:
min_salary = df["Salary"].min()
max_salary = df["Salary"].max()
print("Minimum Salary:", min_salary)
print("Maximum Salary:", max_salary)

Minimum Salary: 50000
Maximum Salary: 100000


In [22]:
#Calculate the average age grouped by city:
average_age_by_city = df.groupby("City")["Age"].mean()
print(average_age_by_city)

City
Berlin      18.0
London      25.0
New York    30.0
Paris       42.0
Tokyo       28.0
Name: Age, dtype: float64
