# Example 1: Convert a Numpy Array to a Pandas DataFrame

In [18]:
import numpy as np
import pandas as pd

# 1. Create a Numpy array (5 rows, 3 columns)
np_data = np.random.randint(1, 100, size=(5, 3)) # Random integers between 1 and 99

# 2. Convert Numpy array to a Pandas DataFrame
df = pd.DataFrame(np_data, columns = ['A','B','C'])

# 3. Display the DataFrame
print("Convert Numpy array to Pandas DataFrame:")
print(df)

Convert Numpy array to Pandas DataFrame:
    A   B   C
0  60  89  92
1  54  49  52
2  89  97  34
3  10  27  34
4  41  41  96


- np.random.randint(1, 100, size=(5,3)) : Creates a 5x3 array with random integers from 1 to 99.
- pd.DataFrame(np_data, columns = ['A','B','C']) : Converts the NumPy array into Pandas DataFrame.

# Example 2: Data Summary & Basic Statistics

In [19]:
print("\n Data summary:")
print(df.describe())

print("\n Column-wise mean:")
print(df.mean())

print("\n Max value in column 'A'")
print(df['A'].max())


 Data summary:
              A          B          C
count   5.00000   5.000000   5.000000
mean   50.80000  60.600000  61.600000
std    28.78715  30.737599  30.509015
min    10.00000  27.000000  34.000000
25%    41.00000  41.000000  34.000000
50%    54.00000  49.000000  52.000000
75%    60.00000  89.000000  92.000000
max    89.00000  97.000000  96.000000

 Column-wise mean:
A    50.8
B    60.6
C    61.6
dtype: float64

 Max value in column 'A'
89


# Example 3: Handling Missing Values(NaN)

- .loc[2, 'B'] = np.nan: Introduces a missing value(NaN)
- .isnull().sum(): Checks the number of missing values per column.
- .fillna(df['B'].mean(), inplace=True): Replaces missing values with the column's mean

- fillna is filling NaN.

In [None]:
# Introduce missing values
df.loc[2, 'B'] = np.nan

# Check for missing values.
print("\n Missing value check:")
print(df.isnull().sum())

# Fill missing values with column mean
df['B'].fillna(df['B'].mean(), inplace=True)

#Display DataFrame after filling missing values.
print("\n Data after filling missing values:")
print(df)


 Missing value check:
A    0
B    1
C    0
dtype: int64

 Data after filling missing values:
    A     B   C
0  60  89.0  92
1  54  49.0  52
2  89  51.5  34
3  10  27.0  34
4  41  41.0  96


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['B'].fillna(df['B'].mean(), inplace=True)
