In [None]:
import pandas as pd
import numpy as np
# Create dictionary of lists
data = {
    'Integers': np.random.randint(10, 101, 10),    # random integers between 10 and 100
    'Floats': np.random.random(10)                # random floats between 0 and 1
}
# Create DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:\n", df)
# Extract rows where integers are divisible by 5 and float values > 0.5
filtered_df = df[(df['Integers'] % 5 == 0) & (df['Floats'] > 0.5)]
print("\nFiltered DataFrame:\n",filtered_df)

Original DataFrame:
    Integers    Floats
0        34  0.062282
1        31  0.290198
2        12  0.637016
3        43  0.951795
4        59  0.963884
5        69  0.216259
6        47  0.901084
7        33  0.056119
8        11  0.165506
9        84  0.999362

Filtered DataFrame:
 Empty DataFrame
Columns: [Integers, Floats]
Index: []


In [None]:
import pandas as pd
import numpy as np
# Create DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [5, np.nan, np.nan, 8, 10],
    'C': ['a', 'b', np.nan, 'd', 'e']
}
df = pd.DataFrame(data)
print("Original DataFrame:\n", df)
# (i) Fill missing values in column A with mean of column A
df['A'] = df['A'].fillna(df['A'].mean())
# (ii) Replace missing values in column B with median of column B
df['B'] = df['B'].fillna(df['B'].median())
# (iii) Drop rows where column C has missing values
df = df.dropna(subset=['C'])
print("\nDataFrame after handling missing values:\n",df)

Original DataFrame:
      A     B    C
0  1.0   5.0    a
1  2.0   NaN    b
2  NaN   NaN  NaN
3  4.0   8.0    d
4  5.0  10.0    e

DataFrame after handling missing values:
      A     B  C
0  1.0   5.0  a
1  2.0   8.0  b
3  4.0   8.0  d
4  5.0  10.0  e


In [None]:
import pandas as pd
# Create DataFrame
data = {
    'Region': ['North', 'South', 'East', 'West'],
    'Product': ['A', 'A', 'B', 'C'],
    'Sales': [200, 150, 400, 300]
}
df = pd.DataFrame(data)
print("Original DataFrame:\n", df)
# Pivot the table to show total sales for each product across regions
pivot_df = df.pivot_table(index='Product', values='Sales', aggfunc='sum')
print("\nPivot Table - Total Sales per Product:\n", pivot_df)
# Add a new column showing percentage of total sales for each product
pivot_df['% of Total Sales'] = (pivot_df['Sales'] / pivot_df['Sales'].sum()) * 100
print("\nPivot Table with Percentage of Total Sales:\n",pivot_df)

Original DataFrame:
   Region Product  Sales
0  North       A    200
1  South       A    150
2   East       B    400
3   West       C    300

Pivot Table - Total Sales per Product:
          Sales
Product       
A          350
B          400
C          300

Pivot Table with Percentage of Total Sales:
          Sales  % of Total Sales
Product                         
A          350         33.333333
B          400         38.095238
C          300         28.571429


In [None]:
import pandas as pd
import numpy as np

# Create a DataFrame with random integers
df = pd.DataFrame({
    'Numbers': np.random.randint(1, 100, 10)   # 10 random integers between 1 and 100
})
# Calculate mean
mean_val = df['Numbers'].mean()
# Define custom function
def high_or_low(x):
    return "High" if x > mean_val else "Low"
# Apply function and add new column
df['Result'] = df['Numbers'].apply(high_or_low)

print("Mean of column:", mean_val)
print(df)

Mean of column: 55.3
   Numbers Result
0       82   High
1       93   High
2       68   High
3       40    Low
4       56   High
5       79   High
6       84   High
7        4    Low
8       26    Low
9       21    Low


In [None]:
import pandas as pd
import numpy as np
# Create a Pandas Series

s = pd.Series(np.random.randint(1, 100, 15))   # 15 random integers between 1 and 100
print("Original Series:\n", s)

# Step 1: Find 75th percentile
percentile_75 = s.quantile(0.75)
# Step 2: Replace values based on condition
new_series = s.apply(lambda x: "High" if x > percentile_75 else "Low")
print("\nNew Series (High/Low based on 75th percentile):\n", new_series)
# Step 3: Compute cumulative sum of original values
cumsum_series = s.cumsum()

print("\nCumulative Sum of Original Series:\n",cumsum_series)

Original Series:
 0     49
1     68
2     81
3     38
4      1
5     63
6     15
7     40
8     83
9     49
10    57
11    98
12    86
13    66
14     6
dtype: int64

New Series (High/Low based on 75th percentile):
 0      Low
1      Low
2     High
3      Low
4      Low
5      Low
6      Low
7      Low
8     High
9      Low
10     Low
11    High
12    High
13     Low
14     Low
dtype: object

Cumulative Sum of Original Series:
 0      49
1     117
2     198
3     236
4     237
5     300
6     315
7     355
8     438
9     487
10    544
11    642
12    728
13    794
14    800
dtype: int64
