In [20]:
import pandas as pd

# 1.Creating DataFrames

In [21]:
data = {
    'Name': ['Alice','Bob','Charlie','Diana'],
    'Age' : [23,25,22,24],
    'Gender': ['Female','Male','Male','Female'],
    'Score': [85,78,90,92]
}

df = pd.DataFrame(data)

df

Unnamed: 0,Name,Age,Gender,Score
0,Alice,23,Female,85
1,Bob,25,Male,78
2,Charlie,22,Male,90
3,Diana,24,Female,92


In [22]:
print(df.head(3))

      Name  Age  Gender  Score
0    Alice   23  Female     85
1      Bob   25    Male     78
2  Charlie   22    Male     90


In [23]:
print(df.dtypes)

Name      object
Age        int64
Gender    object
Score      int64
dtype: object


In [24]:
df['Passed'] = df['Score'] > 80
df

Unnamed: 0,Name,Age,Gender,Score,Passed
0,Alice,23,Female,85,True
1,Bob,25,Male,78,False
2,Charlie,22,Male,90,True
3,Diana,24,Female,92,True


In [25]:
df = df.drop('Gender', axis=1)
df

Unnamed: 0,Name,Age,Score,Passed
0,Alice,23,85,True
1,Bob,25,78,False
2,Charlie,22,90,True
3,Diana,24,92,True


# 2. Indexing and Slicing

In [26]:
name_score = df [['Name', 'Score']]
name_score

Unnamed: 0,Name,Score
0,Alice,85
1,Bob,78
2,Charlie,90
3,Diana,92


In [27]:
higher_score = df[df.Score > 85]
higher_score

Unnamed: 0,Name,Age,Score,Passed
2,Charlie,22,90,True
3,Diana,24,92,True


In [28]:
bob_row = df.loc[df.Name == 'Bob']
bob_row


Unnamed: 0,Name,Age,Score,Passed
1,Bob,25,78,False


In [29]:
last_two = df.tail(2)
last_two

Unnamed: 0,Name,Age,Score,Passed
2,Charlie,22,90,True
3,Diana,24,92,True


# 3. Data Analysis

In [30]:
avg_age = df.Age.mean()
print(avg_age)

23.5


In [31]:
max_score = df.Score.max()
min_score = df.Score.min()
print('max: ',max_score,'\nmin: ', min_score)

max:  92 
min:  78


In [32]:
passed_students = df.Passed.sum()
print(passed_students)

3


In [33]:
avg_score_per_group = df.groupby('Passed').Score.mean()
print(avg_score_per_group)

Passed
False    78.0
True     89.0
Name: Score, dtype: float64


In [34]:
sorted_df = df.sort_values(by='Score', ascending=False)
sorted_df

Unnamed: 0,Name,Age,Score,Passed
3,Diana,24,92,True
2,Charlie,22,90,True
0,Alice,23,85,True
1,Bob,25,78,False


# 4. Importing and Exporting Data

In [35]:
df.to_csv('students.csv', index=False)

In [36]:
new_df = pd.read_csv('students.csv')
new_df

Unnamed: 0,Name,Age,Score,Passed
0,Alice,23,85,True
1,Bob,25,78,False
2,Charlie,22,90,True
3,Diana,24,92,True


# 5. Real-World Problem

In [37]:
product_sales = {
    'Product' : ['Laptop','Smartphone','Desk','Chair'],
    'Category' : ['Electronics','Electronics','Furniture','Furniture'],
    'Sales' : [500,300,200,150],
    'Discount' : [50,30,20,15]
}
Sales_data = pd.DataFrame(product_sales)
Sales_data

Unnamed: 0,Product,Category,Sales,Discount
0,Laptop,Electronics,500,50
1,Smartphone,Electronics,300,30
2,Desk,Furniture,200,20
3,Chair,Furniture,150,15


In [38]:
Sales_data['Net Sales'] = Sales_data.Sales - Sales_data.Discount
Sales_data

Unnamed: 0,Product,Category,Sales,Discount,Net Sales
0,Laptop,Electronics,500,50,450
1,Smartphone,Electronics,300,30,270
2,Desk,Furniture,200,20,180
3,Chair,Furniture,150,15,135


In [39]:
Total_sales = Sales_data.groupby('Category')['Sales'].sum()
print(Total_sales)

Category
Electronics    800
Furniture      350
Name: Sales, dtype: int64


In [40]:
highest_net_sale = Sales_data.loc[Sales_data['Net Sales'].idxmax(), 'Product']
print("Product with highest net sales: ",highest_net_sale)

Product with highest net sales:  Laptop
