In [1]:
import pandas as pd
import numpy as np  # For generating random salary values

# Create initial DataFrame
data = {
    'First Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'San Francisco', 'Los Angeles', 'Chicago']
}
df = pd.DataFrame(data)

# Rename columns using a function
df.rename(columns=lambda x: x.lower().replace(' ', '_'), inplace=True)

# Print first 3 rows
print("First 3 rows:")
print(df.head(3))

# Find mean age
mean_age = df['age'].mean()
print("\nMean Age:", mean_age)

# Select and print only the 'first_name' and 'city' columns
print("\nName and City columns:")
print(df[['first_name', 'city']])

# Add a new column 'salary' with random values
df['salary'] = np.random.randint(40000, 80000, size=len(df))

# Display summary statistics
print("\nSummary Statistics:")
print(df.describe(include='all'))


First 3 rows:
  first_name  age           city
0      Alice   25       New York
1        Bob   30  San Francisco
2    Charlie   35    Los Angeles

Mean Age: 32.5

Name and City columns:
  first_name           city
0      Alice       New York
1        Bob  San Francisco
2    Charlie    Los Angeles
3      David        Chicago

Summary Statistics:
       first_name        age      city        salary
count           4   4.000000         4      4.000000
unique          4        NaN         4           NaN
top         Alice        NaN  New York           NaN
freq            1        NaN         1           NaN
mean          NaN  32.500000       NaN  65650.000000
std           NaN   6.454972       NaN  12833.973144
min           NaN  25.000000       NaN  49520.000000
25%           NaN  28.750000       NaN  59547.500000
50%           NaN  32.500000       NaN  66545.000000
75%           NaN  36.250000       NaN  72647.500000
max           NaN  40.000000       NaN  79990.000000


In [2]:
# Create DataFrame for sales and expenses
data = {
    'Month': ['Jan', 'Feb', 'Mar', 'Apr'],
    'Sales': [5000, 6000, 7500, 8000],
    'Expenses': [3000, 3500, 4000, 4500]
}
sales_and_expenses = pd.DataFrame(data)

# Maximum values
max_sales = sales_and_expenses['Sales'].max()
max_expenses = sales_and_expenses['Expenses'].max()
print("\nMax Sales:", max_sales)
print("Max Expenses:", max_expenses)

# Minimum values
min_sales = sales_and_expenses['Sales'].min()
min_expenses = sales_and_expenses['Expenses'].min()
print("\nMin Sales:", min_sales)
print("Min Expenses:", min_expenses)

# Average values
avg_sales = sales_and_expenses['Sales'].mean()
avg_expenses = sales_and_expenses['Expenses'].mean()
print("\nAverage Sales:", avg_sales)
print("Average Expenses:", avg_expenses)



Max Sales: 8000
Max Expenses: 4500

Min Sales: 5000
Min Expenses: 3000

Average Sales: 6625.0
Average Expenses: 3750.0


In [3]:
# Create DataFrame for expenses
data = {
    'Category': ['Rent', 'Utilities', 'Groceries', 'Entertainment'],
    'January': [1200, 200, 300, 150],
    'February': [1300, 220, 320, 160],
    'March': [1400, 240, 330, 170],
    'April': [1500, 250, 350, 180]
}
expenses = pd.DataFrame(data)

# Set 'Category' as index
expenses = expenses.set_index('Category')

# Maximum expense per category
print("\nMaximum expense per category:")
print(expenses.max(axis=1))

# Minimum expense per category
print("\nMinimum expense per category:")
print(expenses.min(axis=1))

# Average expense per category
print("\nAverage expense per category:")
print(expenses.mean(axis=1))



Maximum expense per category:
Category
Rent             1500
Utilities         250
Groceries         350
Entertainment     180
dtype: int64

Minimum expense per category:
Category
Rent             1200
Utilities         200
Groceries         300
Entertainment     150
dtype: int64

Average expense per category:
Category
Rent             1350.0
Utilities         227.5
Groceries         325.0
Entertainment     165.0
dtype: float64
