Q1. Create pandas Series from list, numpy array, and dictionary

In [None]:
# Step 1: Import pandas and numpy
import pandas as pd         # pandas is used for handling Series and DataFrames
import numpy as np          # numpy is used for numerical operations


In [None]:
# Step 2: Create a list, numpy array, and dictionary
mylist = list('abcdefghijklmnopqrstuvwxyz')  # create a list of all lowercase letters
myarr = np.arange(26)                        # create a numpy array with numbers 0–25
mydict = dict(zip(mylist, myarr))            # combine list and array into a dictionary

In [None]:
# Step 3: Create pandas Series
ser_from_list = pd.Series(mylist)            # create Series from a list
ser_from_array = pd.Series(myarr)            # create Series from numpy array
ser_from_dict = pd.Series(mydict)            # create Series from dictionary


In [None]:
# Step 4: Display all three Series
print("Series from list:\n", ser_from_list.head())
print("\nSeries from array:\n", ser_from_array.head())
print("\nSeries from dictionary:\n", ser_from_dict.head())

Series from list:
 0    a
1    b
2    c
3    d
4    e
dtype: object

Series from array:
 0    0
1    1
2    2
3    3
4    4
dtype: int64

Series from dictionary:
 a    0
b    1
c    2
d    3
e    4
dtype: int64


Q2. Convert the Series ser into a DataFrame with index as column

In [None]:
# Step 1: Convert Series into a DataFrame and keep the index as a column
df = ser_from_dict.reset_index()     # converts the index into a column
df.columns = ['letter', 'number']    # rename columns for clarity

# Step 2: Show the DataFrame
print(df.head())

  letter  number
0      a       0
1      b       1
2      c       2
3      d       3
4      e       4


Q3. Combine ser1 and ser2 to form a DataFrame

In [None]:
# Step 1: Create two Series
ser1 = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

# Step 2: Combine horizontally into a DataFrame
df = pd.concat([ser1, ser2], axis=1)

# Step 3: Rename the columns
df.columns = ['alphabets', 'numbers']

# Step 4: Show first few rows
print(df.head())


  alphabets  numbers
0         a        0
1         b        1
2         c        2
3         d        3
4         e        4


Q4. Give a name to the Series

In [None]:
# Step 1: Create a Series
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))

# Step 2: Assign a name
ser.name = 'alphabets'

# Step 3: Display the Series
print(ser.head())

0    a
1    b
2    c
3    d
4    e
Name: alphabets, dtype: object


Q5. From ser1 remove items present in ser2

In [None]:
# Step 1: Create two Series
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Step 2: Remove common items
result = ser1[~ser1.isin(ser2)]

# Step 3: Display result
print(result)

0    1
1    2
2    3
dtype: int64


Q6. Get all items of ser1 and ser2 not common to both

In [None]:
# Step 1: Find union (all unique items)
union = pd.Series(np.union1d(ser1, ser2))

# Step 2: Find intersection (common items)
intersection = pd.Series(np.intersect1d(ser1, ser2))

# Step 3: Subtract intersection from union
not_common = union[~union.isin(intersection)]

# Step 4: Show result
print(not_common)


0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64


Q7. Compute min, 25th percentile, median, 75th, and max

In [None]:
ser = pd.Series(np.random.normal(10, 5, 25))   # generate random normal data

# Compute summary statistics
print("Minimum:", ser.min())
print("25th percentile:", ser.quantile(0.25))
print("Median:", ser.median())
print("75th percentile:", ser.quantile(0.75))
print("Maximum:", ser.max())

Minimum: -0.6007064502699144
25th percentile: 5.802431951085807
Median: 9.957656079159877
75th percentile: 14.06921765373826
Maximum: 22.333313583293357


Q8. Calculate frequency counts of each unique value

In [None]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
print(ser.value_counts())     # count occurrences of each unique value


d    8
h    6
g    5
a    3
f    3
e    2
b    2
c    1
Name: count, dtype: int64


Q9. Keep top 2 frequent items and replace others with 'Other'

In [None]:
np.random.seed(100)
ser = pd.Series(np.random.randint(1, 5, 12))

top2 = ser.value_counts().index[:2]           # top 2 frequent values
ser = ser.where(ser.isin(top2), other='Other')  # replace rest with 'Other'

print(ser)

0         1
1         1
2         4
3         4
4         4
5         4
6         1
7     Other
8     Other
9         1
10    Other
11    Other
dtype: object


In [None]:
Q10. Bin the Series into 10 equal deciles

In [None]:
ser = pd.Series(np.random.random(20))     # 20 random numbers
labels = [f'Bin{i}' for i in range(1, 11)]
binned = pd.qcut(ser, q=10, labels=labels)

print(binned)

0      Bin7
1      Bin9
2      Bin1
3      Bin6
4      Bin9
5      Bin3
6      Bin3
7      Bin1
8      Bin4
9     Bin10
10     Bin7
11     Bin2
12     Bin8
13     Bin4
14     Bin6
15    Bin10
16     Bin8
17     Bin5
18     Bin2
19     Bin5
dtype: category
Categories (10, object): ['Bin1' < 'Bin2' < 'Bin3' < 'Bin4' ... 'Bin7' < 'Bin8' < 'Bin9' < 'Bin10']


Q11. Reshape Series into 7 rows × 5 columns

In [None]:
ser = pd.Series(np.random.randint(1, 10, 35))  # 35 random numbers
df = ser.values.reshape(7, 5)                  # reshape into 7x5 array

print(pd.DataFrame(df))

   0  1  2  3  4
0  1  3  4  3  6
1  9  2  1  8  7
2  3  1  9  3  6
3  2  9  2  6  5
4  3  9  4  6  1
5  4  7  4  5  8
6  7  4  1  5  5


Q12. Find positions of numbers that are multiples of 3

In [None]:
ser = pd.Series(np.random.randint(1, 10, 7))
positions = np.where(ser % 3 == 0)[0]

print("Series:\n", ser)
print("Positions of multiples of 3:", positions)

Series:
 0    6
1    8
2    7
3    7
4    3
5    5
6    3
dtype: int64
Positions of multiples of 3: [0 4 6]


Q13. Extract items at given positions

In [None]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]       # specific positions
print(ser[pos])

0     a
4     e
8     i
14    o
20    u
dtype: object


Q14. Stack ser1 and ser2 vertically and horizontally

In [None]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

# Vertical stack
vertical = pd.concat([ser1, ser2], axis=0)

# Horizontal stack
horizontal = pd.concat([ser1, ser2], axis=1)
horizontal.columns = ['numbers', 'letters']

print("Vertical Stack:\n", vertical)
print("\nHorizontal Stack:\n", horizontal)

Vertical Stack:
 0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

Horizontal Stack:
    numbers letters
0        0       a
1        1       b
2        2       c
3        3       d
4        4       e


Q15. Get the positions of ser2 items in ser1

In [None]:
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])

positions = [ser1[ser1 == x].index[0] for x in ser2]
print(positions)

[np.int64(5), np.int64(4), np.int64(0), np.int64(8)]


Q16. Compute Mean Squared Error (MSE)

In [None]:
truth = pd.Series(range(10))
pred = pd.Series(range(10)) + np.random.random(10)

mse = np.mean((truth - pred)**2)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.30090213781983577


Q17. Capitalize first character of each word

In [None]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
print(ser.str.capitalize())

0     How
1      To
2    Kick
3    Ass?
dtype: object


Q18. Count number of characters in each word

In [None]:
ser = pd.Series(['how', 'to', 'kick', 'ass?'])
print(ser.str.len())

0    3
1    2
2    4
3    4
dtype: int64


Q19. Convert date strings to timeseries

In [None]:
ser = pd.Series(['01 Jan 2010', '02-02-2011', '20120303',
                 '2013/04/04', '2014-05-05', '2015-06-06T12:20'])
dates = pd.to_datetime(ser, format='mixed')

print(dates)

0   2010-01-01 00:00:00
1   2011-02-02 00:00:00
2   2012-03-03 00:00:00
3   2013-04-04 00:00:00
4   2014-05-05 00:00:00
5   2015-06-06 12:20:00
dtype: datetime64[ns]


Q20. Get day, week, year, and weekday

In [None]:
day = dates.dt.day
week = dates.dt.isocalendar().week
year = dates.dt.year
weekday = dates.dt.day_name()

print(pd.DataFrame({'day': day, 'week': week, 'year': year, 'weekday': weekday}))

   day  week  year    weekday
0    1    53  2010     Friday
1    2     5  2011  Wednesday
2    3     9  2012   Saturday
3    4    14  2013   Thursday
4    5    19  2014     Monday
5    6    23  2015   Saturday


Q21. Change ser to dates that start with 4th of each month

In [None]:
ser = pd.Series(['Jan 2010', 'Feb 2011', 'Mar 2012'])
dates = pd.to_datetime('04 ' + ser)

print(dates)

0   2010-01-04
1   2011-02-04
2   2012-03-04
dtype: datetime64[ns]


Q22. Extract words with at least 2 vowels

In [None]:
ser = pd.Series(['Apple', 'Orange', 'Plan', 'Python', 'Money'])
filtered = ser[ser.str.count(r'[aeiouAEIOU]') >= 2]

print(filtered)

0     Apple
1    Orange
4     Money
dtype: object


Q23. Extract valid emails from text

In [None]:
emails = pd.Series(['buying books at amazom.com', 'rameses@egypt.com',
                    'matt@t.co', 'narendra@modi.com'])
pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}'
valid = emails[emails.str.contains(pattern)]

print(valid)

Series([], dtype: object)


Q24. Compute mean weight for each fruit

In [None]:
fruit = pd.Series(np.random.choice(['apple', 'banana', 'carrot'], 10))
weights = pd.Series(np.linspace(1, 10, 10))

df = pd.DataFrame({'fruit': fruit, 'weight': weights})
print(df.groupby('fruit')['weight'].mean())


fruit
apple     3.500000
banana    4.600000
carrot    8.333333
Name: weight, dtype: float64


Q25. Compute Euclidean distance between Series p and q

In [None]:
p = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
q = pd.Series([10, 9, 8, 7, 6, 5, 4, 3, 2, 1])

distance = np.sqrt(np.sum((p - q) ** 2))
print("Euclidean Distance:", distance)

Euclidean Distance: 18.16590212458495


Q26. Get positions of peaks (local maxima)

In [None]:
ser = pd.Series([2, 10, 3, 4, 9, 10, 2, 7, 3])

# A peak is greater than its immediate neighbors
peaks = ser[(ser.shift(1) < ser) & (ser.shift(-1) < ser)]
print("Peak positions:", peaks.index.tolist())

Peak positions: [1, 5, 7]
