In [19]:
#Basics of Python- Part 3
#Question 1: Data Filtering
employees = [
    {"name": "Alice", "salary": 60000, "performance_score": 90, "department": "Finance"},
    {"name": "Bob", "salary": 50000, "performance_score": 75, "department": "HR"},
    {"name": "Charlie", "salary": 70000, "performance_score": 85, "department": "IT"},
    {"name": "Diana", "salary": 65000, "performance_score": 92, "department": "Finance"},
    {"name": "Ethan", "salary": 40000, "performance_score": 60, "department": "HR"}
]

filtered_employees = [emp["name"] for emp in employees 
                     if emp["salary"] > 60000 and emp["performance_score"] > 85]

print("Employees with salary greater than 60000 and performance score greater than 85:", filtered_employees)

Employees with salary greater than 60000 and performance score greater than 85: ['Diana']


In [21]:
#Question 2: Department-wise Analysis

# Storing total salary & count
total = {}
count = {}

for emp in employees:
    dept = emp["department"]
    salary = emp["salary"]
    
    if dept in total:
        total[dept] += salary
        count[dept] += 1
    else:
        total[dept] = salary
        count[dept] = 1

# Average salary per department
for dept in total:
    avg = total[dept] / count[dept]
    print(dept, "average salary:", round(avg, 2))

Finance average salary: 62500.0
HR average salary: 45000.0
IT average salary: 70000.0


In [23]:
#Question 3: Employee Ranking

# Sorting by performance in descending order
ranked = sorted(employees, key=lambda x: x["performance_score"], reverse=True)

# Top 3 performers
print("Top 3 performing employees:")
for emp in ranked[:3]:
    print(emp["name"], "-", emp["performance_score"])

Top 3 performing employees:
Diana - 92
Alice - 90
Charlie - 85


In [25]:
#Question 4: Condition-Based Salary Increment

# Salary increment
for emp in employees:
    if emp["department"] == "HR" and emp["performance_score"] > 70:
        emp["salary"] = round(emp["salary"] * 1.10)

print("Updated employee data:")
for emp in employees:
    print(emp)

Updated employee data:
{'name': 'Alice', 'salary': 60000, 'performance_score': 90, 'department': 'Finance'}
{'name': 'Bob', 'salary': 55000, 'performance_score': 75, 'department': 'HR'}
{'name': 'Charlie', 'salary': 70000, 'performance_score': 85, 'department': 'IT'}
{'name': 'Diana', 'salary': 65000, 'performance_score': 92, 'department': 'Finance'}
{'name': 'Ethan', 'salary': 40000, 'performance_score': 60, 'department': 'HR'}


In [27]:
#Question 5: Nested Conditions for Bonus Allocation

# Bonus calculation
for emp in employees:
    score = emp["performance_score"]
    salary = emp["salary"]
    
    if score > 90:
        bonus = 0.15 * salary
    elif 80 <= score <= 90:
        bonus = 0.10 * salary
    else:
        bonus = 0.05 * salary
    
    print(f"{emp['name']} - Bonus: ${round(bonus, 2)}")

Alice - Bonus: $6000.0
Bob - Bonus: $2750.0
Charlie - Bonus: $7000.0
Diana - Bonus: $9750.0
Ethan - Bonus: $2000.0


In [39]:
#Question 6: Custom Grading System

# Assigning performance grades
print("Performance Grades:")
for emp in employees:
    score = emp["performance_score"]
    
    if score > 90:
        grade = "Excellent"
    elif 80 <= score <= 90:
        grade = "Good"
    else:
        grade = "Needs Improvement"
    
    print(f"{emp['name']} - Grade: {grade}")

Performance Grades:
Alice - Grade: Good
Bob - Grade: Needs Improvement
Charlie - Grade: Good
Diana - Grade: Excellent
Ethan - Grade: Needs Improvement


In [31]:
#Question 7: Aggregation Across Departments

# Calculating the total salary per department
dept_salary = {}
for emp in employees:
    dept = emp["department"]
    salary = emp["salary"]
    if dept in dept_salary:
        dept_salary[dept] += salary
    else:
        dept_salary[dept] = salary

print("Total salary expenditure per department:")
for dept, total in dept_salary.items():
    print(f"{dept}: ${total}")

highest_dept = max(dept_salary, key=dept_salary.get)
print(f"\nThe Department with highest expenditure: {highest_dept} (${dept_salary[highest_dept]})")

Total salary expenditure per department:
Finance: $125000
HR: $95000
IT: $70000

The Department with highest expenditure: Finance ($125000)


In [33]:
#Question 8: Data Transformation for Visualization 

dept_scores = {}
for emp in employees:
    dept = emp["department"]
    score = emp["performance_score"]
    if dept in dept_scores:
        dept_scores[dept].append(score)
    else:
        dept_scores[dept] = [score]

print(dept_scores)

{'Finance': [90, 92], 'HR': [75, 60], 'IT': [85]}


In [35]:
#Question 9: Loan ELigibility Assessment

for emp in employees:
    if emp["performance_score"] > 80 and emp["salary"] < 65000:
        emp["loan_eligible"] = True
    else:
        emp["loan_eligible"] = False

for emp in employees:
    print(emp)

{'name': 'Alice', 'salary': 60000, 'performance_score': 90, 'department': 'Finance', 'loan_eligible': True}
{'name': 'Bob', 'salary': 55000, 'performance_score': 75, 'department': 'HR', 'loan_eligible': False}
{'name': 'Charlie', 'salary': 70000, 'performance_score': 85, 'department': 'IT', 'loan_eligible': False}
{'name': 'Diana', 'salary': 65000, 'performance_score': 92, 'department': 'Finance', 'loan_eligible': False}
{'name': 'Ethan', 'salary': 40000, 'performance_score': 60, 'department': 'HR', 'loan_eligible': False}


In [41]:
#Question 10: Cross Filtering For Savings Analysis

# Savings threshold
threshold = 12000

# Employees whose savings exceed $12,000 are
for emp in employees:
    savings = emp["salary"] * 0.20
    if savings > threshold:
        print(f"{emp['name']} from {emp['department']} has savings > $12,000 annually.")

Charlie from IT has savings > $12,000 annually.
Diana from Finance has savings > $12,000 annually.


In [83]:
#Pandas Library- Part 1
#Question 1: Data Loading & Structure

import pandas as pd

weather_data = {
    "City": ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix"],
    "Temperature (°F)": [85, 90, 78, 92, 104],
    "Humidity (%)": [70, 50, 60, 55, 40],
    "Rainfall (inches)": [1.2, 0.5, 2.3, 0.8, 0.0],
    "Date": ["2025-01-01", "2025-01-01", "2025-01-01", "2025-01-01", "2025-01-01"]
}

# Creating DataFrame
df = pd.DataFrame(weather_data)

# Displaying the first three rows
print("First three rows:")
print(df.head(3))
print("\n")

# Display the structure of the DataFrame and the data types
print("DataFrame structure:")
print(df.info())

First three rows:
          City  Temperature (°F)  Humidity (%)  Rainfall (inches)        Date
0     New York                85            70                1.2  2025-01-01
1  Los Angeles                90            50                0.5  2025-01-01
2      Chicago                78            60                2.3  2025-01-01


DataFrame structure:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               5 non-null      object 
 1   Temperature (°F)   5 non-null      int64  
 2   Humidity (%)       5 non-null      int64  
 3   Rainfall (inches)  5 non-null      float64
 4   Date               5 non-null      object 
dtypes: float64(1), int64(2), object(2)
memory usage: 332.0+ bytes
None


In [57]:
#Question 2: Temperature Analysis

average_temperature = df["Temperature (°F)"].mean()

highest_temp_city = df.loc[df["Temperature (°F)"].idxmax(), "City"]
lowest_temp_city = df.loc[df["Temperature (°F)"].idxmin(), "City"]

print(f"Average temperature: {average_temperature}°F")
print(f"City with the highest temperature: {highest_temp_city}")
print(f"City with the lowest temperature: {lowest_temp_city}")

Average temperature: 89.8°F
City with the highest temperature: Phoenix
City with the lowest temperature: Chicago


In [61]:
#Question 3: Filtering Data

# Filtering cities with temperatures less than 80°F and humidity greater than 60%
filtered_df = df.loc[(df["Temperature (°F)"] >= 80) & (df["Humidity (%)"] <= 60)]

print(filtered_df)

          City  Temperature (°F)  Humidity (%)  Rainfall (inches)        Date
1  Los Angeles                90            50                0.5  2025-01-01
3      Houston                92            55                0.8  2025-01-01
4      Phoenix               104            40                0.0  2025-01-01


In [93]:
#Question 4: Date Formatting

# Converting 'Date' column to datetime object
df["Date"] = pd.to_datetime(df["Date"])

#Extracting Year & Month as new columns
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month

print(df)

          City  Temperature (°F)  Humidity (%)  Rainfall (inches)       Date  \
0     New York                85            70                1.2 2025-01-01   
1  Los Angeles                90            50                0.5 2025-01-01   
2      Chicago                78            60                2.3 2025-01-01   
3      Houston                92            55                0.8 2025-01-01   
4      Phoenix               104            40                0.0 2025-01-01   

   Year  Month  \nYear  
0  2025      1    2025  
1  2025      1    2025  
2  2025      1    2025  
3  2025      1    2025  
4  2025      1    2025  


In [95]:
#Question 4: Date Formatting

# Converting 'Date' column to datetime object
df["Date"] = pd.to_datetime(df["Date"])

#Extracting Year & Month as new columns
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month

print(df)

          City  Temperature (°F)  Humidity (%)  Rainfall (inches)       Date  \
0     New York                85            70                1.2 2025-01-01   
1  Los Angeles                90            50                0.5 2025-01-01   
2      Chicago                78            60                2.3 2025-01-01   
3      Houston                92            55                0.8 2025-01-01   
4      Phoenix               104            40                0.0 2025-01-01   

   Year  Month  \nYear  
0  2025      1    2025  
1  2025      1    2025  
2  2025      1    2025  
3  2025      1    2025  
4  2025      1    2025  


In [67]:
#Question 5: Humidity Categorization

# Adding a new column called Humidity Level
def categorize_humidity(humidity):
    if humidity > 60:
        return "High"
    elif 40 <= humidity <= 60:
        return "Moderate"
    else:
        return "Low"

df["Humidity Level"] = df["Humidity (%)"].apply(categorize_humidity)

print(df)

          City  Temperature (°F)  Humidity (%)  Rainfall (inches)       Date  \
0     New York                85            70                1.2 2025-01-01   
1  Los Angeles                90            50                0.5 2025-01-01   
2      Chicago                78            60                2.3 2025-01-01   
3      Houston                92            55                0.8 2025-01-01   
4      Phoenix               104            40                0.0 2025-01-01   

   Year  Month Humidity Level  
0  2025      1           High  
1  2025      1       Moderate  
2  2025      1       Moderate  
3  2025      1       Moderate  
4  2025      1       Moderate  


In [71]:
#Question 6: Correlation Analysis

# Calculating the correlation coefficient between Temperature and Humidity
correlation = df["Temperature (°F)"].corr(df["Humidity (%)"])

print(f"Correlation coefficient between Temperature and Humidity: {correlation}")

Correlation coefficient between Temperature and Humidity: -0.8034125966362735


In [73]:
#Question 7: Rainfall Insights

# Creating a new column Rainy City
df["Rainy City"] = df["Rainfall (inches)"].apply(lambda x: "Yes" if x > 1 else "No")

print(df)

          City  Temperature (°F)  Humidity (%)  Rainfall (inches)        Date  \
0     New York                85            70                1.2  2025-01-01   
1  Los Angeles                90            50                0.5  2025-01-01   
2      Chicago                78            60                2.3  2025-01-01   
3      Houston                92            55                0.8  2025-01-01   
4      Phoenix               104            40                0.0  2025-01-01   

  Rainy City  
0        Yes  
1         No  
2        Yes  
3         No  
4         No  


In [75]:
#Question 8: Statistical Summary

summary = df[["Temperature (°F)", "Humidity (%)", "Rainfall (inches)"]].describe()

print(summary)

       Temperature (°F)  Humidity (%)  Rainfall (inches)
count          5.000000       5.00000           5.000000
mean          89.800000      55.00000           0.960000
std            9.602083      11.18034           0.867756
min           78.000000      40.00000           0.000000
25%           85.000000      50.00000           0.500000
50%           90.000000      55.00000           0.800000
75%           92.000000      60.00000           1.200000
max          104.000000      70.00000           2.300000


In [77]:
#Question 9: Data Grouping

df["Rainy City"] = df["Rainfall (inches)"].apply(lambda x: "Yes" if x > 1 else "No")

grouped = df.groupby("Rainy City")[["Temperature (°F)", "Humidity (%)"]].mean()

# Display the results
print(grouped)

            Temperature (°F)  Humidity (%)
Rainy City                                
No                 95.333333     48.333333
Yes                81.500000     65.000000


In [87]:
#Question 10: Data Visualization Preparation

#Calculating the Total Rainfall and Average Temperatures for all the cities
summary_df = df.groupby("City").agg(
    Total_Rainfall=("Rainfall (inches)", "sum"),
    Average_Temperature=("Temperature (°F)", "mean")
).reset_index()

print(summary_df)

          City  Total_Rainfall  Average_Temperature
0      Chicago             2.3                 78.0
1      Houston             0.8                 92.0
2  Los Angeles             0.5                 90.0
3     New York             1.2                 85.0
4      Phoenix             0.0                104.0
