In [3]:
#Task01
import pandas as pd
import numpy as np


In [4]:
series = pd.Series([10, 20, 30, 40], name="Sample Series")
print(series)


0    10
1    20
2    30
3    40
Name: Sample Series, dtype: int64


In [7]:
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 22],
    'City': ['NY', 'LA', 'Chicago']
}
df = pd.DataFrame(data)
print(df)


      Name  Age     City
0    Alice   25       NY
1      Bob   30       LA
2  Charlie   22  Chicago


In [10]:
df.to_excel("sample_data.xlsx", index=False)


In [9]:
print("First row:\n", df.head(1))
print("Last row:\n", df.tail(1))
print("Info:\n")
df.info()
print("Description:\n", df.describe(include='all'))


First row:
     Name  Age City
0  Alice   25   NY
Last row:
       Name  Age     City
2  Charlie   22  Chicago
Info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes
Description:
          Name        Age City
count       3   3.000000    3
unique      3        NaN    3
top     Alice        NaN   NY
freq        1        NaN    1
mean      NaN  25.666667  NaN
std       NaN   4.041452  NaN
min       NaN  22.000000  NaN
25%       NaN  23.500000  NaN
50%       NaN  25.000000  NaN
75%       NaN  27.500000  NaN
max       NaN  30.000000  NaN


In [11]:
df_missing = pd.DataFrame({
    'A': [1, 2, np.nan, 4],
    'B': [5, np.nan, np.nan, 8],
    'C': [9, 10, 11, 12]
})
print(df_missing)


     A    B   C
0  1.0  5.0   9
1  2.0  NaN  10
2  NaN  NaN  11
3  4.0  8.0  12


In [12]:
# Drop rows with any missing value
print(df_missing.dropna())

# Fill missing values with a constant
print(df_missing.fillna(0))

# Interpolate (linear)
print(df_missing.interpolate())


     A    B   C
0  1.0  5.0   9
3  4.0  8.0  12
     A    B   C
0  1.0  5.0   9
1  2.0  0.0  10
2  0.0  0.0  11
3  4.0  8.0  12
     A    B   C
0  1.0  5.0   9
1  2.0  6.0  10
2  3.0  7.0  11
3  4.0  8.0  12


In [13]:
df_renamed = df.rename(columns={'Name': 'FullName'})
print(df_renamed)


  FullName  Age     City
0    Alice   25       NY
1      Bob   30       LA
2  Charlie   22  Chicago


In [14]:
df['Age'] = df['Age'].astype(float)
print(df.dtypes)


Name     object
Age     float64
City     object
dtype: object


In [15]:
df['AgePlusTen'] = df['Age'] + 10
print(df)


      Name   Age     City  AgePlusTen
0    Alice  25.0       NY        35.0
1      Bob  30.0       LA        40.0
2  Charlie  22.0  Chicago        32.0


In [16]:
#Task02
import pandas as pd
import numpy as np


In [17]:
np.random.seed(1)
df = pd.DataFrame({
    'ID': np.arange(1, 6),
    'Score': np.random.randint(50, 100, 5),
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Female']
})
print(df)


   ID  Score  Gender
0   1     87    Male
1   2     93  Female
2   3     62  Female
3   4     58    Male
4   5     59  Female


In [18]:
print(df.isnull())
print("Total missing values:\n", df.isnull().sum())


      ID  Score  Gender
0  False  False   False
1  False  False   False
2  False  False   False
3  False  False   False
4  False  False   False
Total missing values:
 ID        0
Score     0
Gender    0
dtype: int64


In [19]:
# Add some NaNs for example
df.loc[2, 'Score'] = np.nan

# Fill with mean
df['Score_filled'] = df['Score'].fillna(df['Score'].mean())

# Drop missing rows
df_dropped = df.dropna()

# Interpolate
df['Score_interp'] = df['Score'].interpolate()


In [20]:
# Add a new column (e.g., scaled score)
df['Score_scaled'] = df['Score_filled'] / 100


In [21]:
df2 = pd.DataFrame({
    'ID': [1, 2, 3, 4, 5],
    'Grade': ['A', 'B', 'C', 'A', 'B']
})

# Concatenation (row-wise)
concat_df = pd.concat([df, df2], axis=1)

# Merging on ID
merged_df = pd.merge(df, df2, on='ID')

# Joining (setting index first)
df3 = df.set_index('ID')
df4 = df2.set_index('ID')
joined_df = df3.join(df4)


In [22]:
grouped = df.groupby('Gender')

# Iterate over groups
for name, group in grouped:
    print(f"\n{name} Group:\n", group)

# Apply aggregation
print("Mean by gender:\n", grouped['Score_filled'].mean())



Female Group:
    ID  Score  Gender  Score_filled  Score_interp  Score_scaled
1   2   93.0  Female         93.00          93.0        0.9300
2   3    NaN  Female         74.25          75.5        0.7425
4   5   59.0  Female         59.00          59.0        0.5900

Male Group:
    ID  Score Gender  Score_filled  Score_interp  Score_scaled
0   1   87.0   Male          87.0          87.0          0.87
3   4   58.0   Male          58.0          58.0          0.58
Mean by gender:
 Gender
Female    75.416667
Male      72.500000
Name: Score_filled, dtype: float64


In [23]:
print(df.groupby('Gender')['Score_filled'].agg(['mean', 'min', 'max']))


             mean   min   max
Gender                       
Female  75.416667  59.0  93.0
Male    72.500000  58.0  87.0


In [24]:
pivot = pd.pivot_table(df, values='Score_filled', index='Gender', aggfunc='mean')
print(pivot)


        Score_filled
Gender              
Female     75.416667
Male       72.500000


In [25]:
summary = df.groupby('Gender').agg({
    'Score_filled': ['mean', 'std'],
    'Score_scaled': ['min', 'max']
})
print(summary)


       Score_filled            Score_scaled      
               mean        std          min   max
Gender                                           
Female    75.416667  17.029998         0.59  0.93
Male      72.500000  20.506097         0.58  0.87


In [28]:
print(df.groupby('Gender').mean())

              ID  Score  Score_filled  Score_interp  Score_scaled
Gender                                                           
Female  3.333333   76.0     75.416667     75.833333      0.754167
Male    2.500000   72.5     72.500000     72.500000      0.725000
