**1.astype()**

a. Convert the column age in a DataFrame to float64:

In [None]:
import pandas as pd

df = pd.DataFrame({'age': [25, 30, 45]})
df['age'] = df['age'].astype('float64')
print(df)


b. Change a Pandas Series of integers into strings using astype():

In [None]:
series = pd.Series([1, 2, 3])
series = series.astype('str')
print(series)


c. Convert a DataFrame containing numeric values into int32 datatype:

In [None]:
df = pd.DataFrame([[1.5, 2.3], [3.1, 4.8]])
df = df.astype('int32')
print(df)


**2. at property**

a. Access the value in row 2 and column 'age' using the at property:

In [None]:
df = pd.DataFrame({'age': [25, 30, 45], 'name': ['Alice', 'Bob', 'Charlie']})
value = df.at[2, 'age']
print(value)


b. Update the value at a specific row and column using at property:

In [None]:
df.at[2, 'age'] = 50
print(df)


c. Use at to access and change multiple values in a DataFrame:

In [None]:
df.at[0, 'age'] = 28
df.at[1, 'name'] = 'Robert'
print(df)


**3. bfill()**

a. Create a DataFrame with some missing values and fill them using bfill():

In [None]:
df = pd.DataFrame({'A': [1, None, 3], 'B': [4, 5, None]})
df_bfilled = df.bfill()
print(df_bfilled)


b. Demonstrate how bfill() works with multiple columns in a DataFrame:

In [None]:
df = pd.DataFrame({'A': [1, None, 3], 'B': [None, 5, 6], 'C': [7, None, None]})
df_bfilled = df.bfill()
print(df_bfilled)


c. Show how to limit the number of bfill() operations to one row:

In [None]:
df_bfilled = df.bfill(limit=1)
print(df_bfilled)


**4. columns property**

a. Display the column names of a DataFrame using the columns property:

In [None]:
df = pd.DataFrame({'age': [25, 30], 'name': ['Alice', 'Bob']})
print(df.columns)


b. Rename columns in a DataFrame by directly modifying the columns property:

In [None]:
df.columns = ['years', 'person_name']
print(df)


c. Add a prefix to all column names using columns:

In [None]:
df.columns = ['years', 'person_name']
df.columns = 'col_' + df.columns
print(df)


**5. combine()**

a. Combine two DataFrames by comparing each element and selecting the larger element:

In [None]:
import pandas as pd
import numpy as np
df1 = pd.DataFrame([[1, 2], [3, 4]])
df2 = pd.DataFrame([[5, 6], [7, 8]])

result = df1.combine(df2, lambda a, b: np.where(a > b, a, b))
print(result)


b. Create a function that performs a custom operation while combining two DataFrames:

In [None]:
import numpy as np

def myfunc(a, b):
    return np.where(a > b, a * 2, b * 2)

df_combined = df1.combine(df2, myfunc)
print(df_combined)



c. Combine two DataFrames column-wise and handle NaN values using a custom function:

In [None]:
import pandas as pd

df1 = pd.DataFrame([[1, None], [None, 4]])
df2 = pd.DataFrame([[5, 6], [7, None]])

# Use combine_first to fill NaNs from df1 with corresponding values from df2
df_combined = df1.combine_first(df2)
print(df_combined)


**6. count()**


a. Count the non-null entries in each column of a DataFrame:


In [None]:
df = pd.DataFrame({'A': [1, None, 3], 'B': [4, None, None]})
print(df.count())


b. Count non-null values row-wise in a DataFrame:

In [None]:
print(df.count(axis=1))


c. Use count() to count occurrences in a grouped DataFrame:

In [None]:
df = pd.DataFrame({'age': [25, 30, 25], 'group': ['A', 'B', 'A']})
grouped = df.groupby('group').count()
print(grouped)


**7. cov()**

a. Compute the covariance between two columns in a DataFrame:

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
print(df['A'].cov(df['B']))


b. Calculate the covariance matrix for an entire DataFrame:

In [None]:
print(df.cov())


     A    B
A  1.0  1.0
B  1.0  1.0


c. Use cov() to find the covariance between different DataFrames:

In [None]:
import pandas as pd

# Sample DataFrames
df1 = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
df2 = pd.DataFrame({'C': [7, 8, 9]})

# Calculate covariance between df1['A'] and df2['C']
cov_result = df1['A'].cov(df2['C'])
print("Covariance between df1['A'] and df2['C']:", cov_result)



Covariance between df1['A'] and df2['C']: 1.0


**8. copy()**



a. Create a deep copy of a DataFrame and modify it without affecting the original:

In [None]:
import pandas as pd

# Original DataFrame
df = pd.DataFrame({'A': [1, 2, 3]})

# Create a deep copy of the DataFrame
df_copy = df.copy()

# Modify df_copy using .loc[] to avoid chained assignment
df_copy.loc[0, 'A'] = 100

# Print both DataFrames to show the difference
print("Original DataFrame:")
print(df)
print("\nModified Copy:")
print(df_copy)


Original DataFrame:
   A
0  1
1  2
2  3

Modified Copy:
     A
0  100
1    2
2    3


b. Make a shallow copy of a DataFrame and observe how changes affect both:

In [None]:
import pandas as pd

# Original DataFrame
df = pd.DataFrame({'A': [1, 2, 3]})

# Create a shallow copy of the DataFrame
df_copy = df.copy(deep=False)

# Modify df_copy (this will also affect df)
df_copy.loc[0, 'A'] = 100

# Print both DataFrames to observe the effect
print("Original DataFrame:")
print(df)
print("\nShallow Copy:")
print(df_copy)



Original DataFrame:
     A
0  100
1    2
2    3

Shallow Copy:
     A
0  100
1    2
2    3


c. Modify a DataFrame after using copy(deep=False) and explain the behavior:

In [None]:
import pandas as pd

# Original DataFrame
df = pd.DataFrame({'A': [100, 2, 3]})

# Create a shallow copy
df_copy = df.copy(deep=False)

# Modify the shallow copy using .loc[] to avoid the chained assignment warning
df_copy.loc[1, 'A'] = 200

# Print both DataFrames
print("Original DataFrame:")
print(df)
print("\nShallow Copy:")
print(df_copy)



Original DataFrame:
     A
0  100
1  200
2    3

Shallow Copy:
     A
0  100
1  200
2    3


**9. cummax()**


a. Compute the cumulative maximum of a column in a DataFrame:

In [None]:
df = pd.DataFrame({'A': [1, 2, 3, 1]})
print(df['A'].cummax())


0    1
1    2
2    3
3    3
Name: A, dtype: int64


b. Apply cummax() row-wise across multiple columns:

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 3, 2]})
print(df.cummax(axis=1))


   A  B
0  1  4
1  2  3
2  3  3


c. Use cummax() to find the cumulative maximum of a Series containing both positive and negative values:

In [None]:
series = pd.Series([1, -2, 3, -4])
print(series.cummax())


0    1
1    1
2    3
3    3
dtype: int64


**10. explode()**

a. Create a DataFrame where one column contains lists, then use explode() to flatten it:

In [None]:
df = pd.DataFrame({'A': [[1, 2], [3, 4]]})
df_exploded = df.explode('A')
print(df_exploded)


   A
0  1
0  2
1  3
1  4


b. Apply explode() to a column with strings and lists mixed together:

In [None]:
df = pd.DataFrame({'A': [[1, 2], 'Not a List']})
df_exploded = df.explode('A')
print(df_exploded)


            A
0           1
0           2
1  Not a List


c. Use explode() on a multi-index DataFrame:

In [None]:
df = pd.DataFrame({'A': [[1, 2], [3, 4]], 'B': ['x', 'y']})
df.set_index(['B'], inplace=True)
df_exploded = df.explode('A')
print(df_exploded)


   A
B   
x  1
x  2
y  3
y  4


**11. ffill()**

a. Create a DataFrame with missing values and fill forward using ffill():

In [None]:
import pandas as pd

df = pd.DataFrame({'A': [1, None, 3], 'B': [None, 5, 6]})
df_filled = df.ffill()
print(df_filled)


     A    B
0  1.0  NaN
1  1.0  5.0
2  3.0  6.0


b. Use ffill() to propagate the last valid observation row-wise:

In [None]:
df_filled = df.ffill(axis=1)
print(df_filled)


     A    B
0  1.0  1.0
1  NaN  5.0
2  3.0  6.0


c. Limit the number of forward fills to 1:

In [None]:
df_filled = df.ffill(limit=1)
print(df_filled)


     A    B
0  1.0  NaN
1  1.0  5.0
2  3.0  6.0


**12. isna()**

a. Create a DataFrame and use isna() to detect missing values:


In [None]:
df = pd.DataFrame({'A': [1, None, 3], 'B': [4, None, None]})
print(df.isna())


       A      B
0  False  False
1   True   True
2  False   True


b. Count the total number of missing values in each column using isna():

In [None]:
print(df.isna().sum())


A    1
B    2
dtype: int64


c. Replace missing values based on the result of isna():

In [None]:
df_filled = df.fillna(0)
print(df_filled)


     A    B
0  1.0  4.0
1  0.0  0.0
2  3.0  0.0


**13. items()**

a. Use items() to iterate over each column in a DataFrame:

In [None]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
for col_name, col_data in df.items():
    print(f"Column: {col_name}")
    print(col_data)


Column: A
0    1
1    2
2    3
Name: A, dtype: int64
Column: B
0    4
1    5
2    6
Name: B, dtype: int64


b. Modify the values of a DataFrame by iterating with items():

In [None]:
for col_name, col_data in df.items():
    df[col_name] = col_data * 2
print(df)


   A   B
0  2   8
1  4  10
2  6  12


c. Combine values from two columns using items():

In [None]:
for col_name, col_data in df.items():
    if col_name == 'A':
        df[col_name] = col_data + df['B']
print(df)


    A   B
0  10   8
1  14  10
2  18  12


**14. nunique()**

a. Find the number of unique values in each column of a DataFrame:

In [None]:
df = pd.DataFrame({'A': [1, 1, 2, 3], 'B': [4, 5, 5, 6]})
print(df.nunique())


A    3
B    3
dtype: int64


b. Count unique values for each row:

In [None]:
print(df.nunique(axis=1))


0    2
1    2
2    2
3    2
dtype: int64


c. Find unique values in a DataFrame with missing values:

In [None]:
df = pd.DataFrame({'A': [1, 2, None], 'B': [4, 5, 6]})
print(df.nunique(dropna=False))


A    3
B    3
dtype: int64


**15. sample()**

a. Select a random sample of 3 rows from a DataFrame:

In [None]:
df = pd.DataFrame({'A': range(10), 'B': range(10, 20)})
sampled_df = df.sample(n=3)
print(sampled_df)


   A   B
7  7  17
9  9  19
6  6  16


b. Randomly sample 50% of the rows:

In [58]:
sampled_df = df.sample(frac=0.5)
print(sampled_df)


   A  B
1  2  5
2  3  6


c. Sample with replacement:

In [59]:
sampled_df = df.sample(n=5, replace=True)
print(sampled_df)


   A  B
1  2  5
2  3  6
1  2  5
0  1  4
1  2  5


**16. take()**

a. Select specific rows using their indices with take():

In [57]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
print(df.take([0, 2]))


   A  B
0  1  4
2  3  6


b. Use take() to reorder rows in a DataFrame:

In [60]:
print(df.take([2, 0, 1]))


   A  B
2  3  6
0  1  4
1  2  5


c. Take multiple elements from a Pandas Series:

In [61]:
series = pd.Series([1, 2, 3, 4])
print(series.take([0, 3]))


0    1
3    4
dtype: int64


**17. groupby()**

a. Group by a column and find the mean of another column:

In [62]:
df = pd.DataFrame({'group': ['A', 'B', 'A', 'B'], 'value': [1, 2, 3, 4]})
grouped = df.groupby('group').mean()
print(grouped)


       value
group       
A        2.0
B        3.0


b. Group by multiple columns and calculate the sum:

In [63]:
df = pd.DataFrame({'A': ['foo', 'bar', 'foo'], 'B': ['one', 'one', 'two'], 'C': [1, 2, 3]})
grouped = df.groupby(['A', 'B']).sum()
print(grouped)


         C
A   B     
bar one  2
foo one  1
    two  3


c. Group by a column and use an aggregation function:

In [64]:
grouped = df.groupby('A').agg({'C': ['min', 'max', 'mean']})
print(grouped)


      C         
    min max mean
A               
bar   2   2  2.0
foo   1   3  2.0


**18. round()**

a. Round values in a DataFrame to 1 decimal place:


In [65]:
df = pd.DataFrame({'A': [1.123, 2.456], 'B': [3.789, 4.012]})
df_rounded = df.round(1)
print(df_rounded)


     A    B
0  1.1  3.8
1  2.5  4.0


b. Round values to different decimals for different columns:

In [66]:
df_rounded = df.round({'A': 0, 'B': 2})
print(df_rounded)


     A     B
0  1.0  3.79
1  2.0  4.01


c. Round values in a Series:

In [67]:
series = pd.Series([1.234, 5.678, 9.123])
series_rounded = series.round(2)
print(series_rounded)


0    1.23
1    5.68
2    9.12
dtype: float64
