In [63]:
# working with numpy libraries
import numpy as np
import pandas as pd

In [64]:
arr = np.array([10, 20, 30, 40, 50])
print("Array:", arr)
print("Number Average:", arr.mean())
print("Total number:", arr.sum())
print("Square of the element:", arr ** 2)

Array: [10 20 30 40 50]
Number Average: 30.0
Total number: 150
Square of the element: [ 100  400  900 1600 2500]


In [65]:
# woking with students score using numpy array
scores = np.array([
    [78, 85, 90],
    [88, 92, 80],
    [70, 75, 85],
    [95, 90, 88],
    [60, 65, 70],
    [85, 80, 82],
    [72, 78, 88],
    [90, 95, 92]
])

print("\nStudent Scores\n", scores)

#Average score per student
print("\nAverage score per student\n", scores.mean(axis=1))

#Average score per subject
print("\nAverage score per subject\n", scores.mean(axis=0))

#Highest score per subject
highest_score = scores.max(axis=0)
print("\nHighest score per subject\n", highest_score)


Student Scores
 [[78 85 90]
 [88 92 80]
 [70 75 85]
 [95 90 88]
 [60 65 70]
 [85 80 82]
 [72 78 88]
 [90 95 92]]

Average score per student
 [84.33333333 86.66666667 76.66666667 91.         65.         82.33333333
 79.33333333 92.33333333]

Average score per subject
 [79.75  82.5   84.375]

Highest score per subject
 [95 95 92]


In [66]:
science_score = scores[:, 2]
score_above_90 = np.where(science_score > 90)[0]
print("\nStudent science above 90\n", score_above_90 + 1)


Student science above 90
 [8]


In [76]:
# working pandas dataframe
#sample dataset
data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Department": ["HR", "IT", "IT", "Finance", "HR"],
    "Salary": [50000, 60000, 55000, 65000, np.nan],
    "Age": [25, np.nan, 28, 40, 35]
}

df = pd.DataFrame(data)
print("\DataFrame\n", df)

\DataFrame
       Name Department   Salary   Age
0    Alice         HR  50000.0  25.0
1      Bob         IT  60000.0   NaN
2  Charlie         IT  55000.0  28.0
3    David    Finance  65000.0  40.0
4      Eve         HR      NaN  35.0


In [77]:
#filter employee that's greater 30
filter_df = df[df['Age'] > 30]
print(filter_df)

    Name Department   Salary   Age
3  David    Finance  65000.0  40.0
4    Eve         HR      NaN  35.0


In [78]:
# average salary by department
salary_grouped = df.groupby('Department')['Salary'].mean()
print("\Average salary by department\n", salary_grouped)

\Average salary by department
 Department
Finance    65000.0
HR         50000.0
IT         57500.0
Name: Salary, dtype: float64


In [79]:
#working with merging data
salary_bonus = pd.DataFrame({
    "Department": ["HR", "IT", "Finance"],
    "Salary_bonus": [5000, 4000, 3000]
})

shift_hrs = pd.DataFrame({
    "Department": ["HR", "IT", "Finance"],
    "Shift_Hrs": [5, 4, 3]
})


salary_bonus
shift_hrs

Unnamed: 0,Department,Shift_Hrs
0,HR,5
1,IT,4
2,Finance,3


In [80]:
df_2 = pd.merge(df, salary_bonus, on="Department")
df_2

Unnamed: 0,Name,Department,Salary,Age,Salary_bonus
0,Alice,HR,50000.0,25.0,5000
1,Bob,IT,60000.0,,4000
2,Charlie,IT,55000.0,28.0,4000
3,David,Finance,65000.0,40.0,3000
4,Eve,HR,,35.0,5000


In [81]:
new_df = pd.merge(df_2, shift_hrs, on="Department")
new_df

Unnamed: 0,Name,Department,Salary,Age,Salary_bonus,Shift_Hrs
0,Alice,HR,50000.0,25.0,5000,5
1,Bob,IT,60000.0,,4000,4
2,Charlie,IT,55000.0,28.0,4000,4
3,David,Finance,65000.0,40.0,3000,3
4,Eve,HR,,35.0,5000,5


In [82]:
print("\Detecting Missing value\n")
print("Missing value\n", new_df.isnull().sum())

\Detecting Missing value

Missing value
 Name            0
Department      0
Salary          1
Age             1
Salary_bonus    0
Shift_Hrs       0
dtype: int64


In [83]:
new_df["Salary"].fillna(new_df["Salary"].mean(), inplace=True)
new_df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  new_df["Salary"].fillna(new_df["Salary"].mean(), inplace=True)


Unnamed: 0,Name,Department,Salary,Age,Salary_bonus,Shift_Hrs
0,Alice,HR,50000.0,25.0,5000,5
1,Bob,IT,60000.0,,4000,4
2,Charlie,IT,55000.0,28.0,4000,4
3,David,Finance,65000.0,40.0,3000,3
4,Eve,HR,57500.0,35.0,5000,5


In [84]:
#working with pandas dropna
new_df.dropna(inplace=True)
new_df

Unnamed: 0,Name,Department,Salary,Age,Salary_bonus,Shift_Hrs
0,Alice,HR,50000.0,25.0,5000,5
2,Charlie,IT,55000.0,28.0,4000,4
3,David,Finance,65000.0,40.0,3000,3
4,Eve,HR,57500.0,35.0,5000,5
