In [3]:
import numpy as np

#### 1. How to import pandas and check the version?

In [4]:
import pandas as pd
print(pd.__version__)

2.0.3


#### 2. Create a pandas series from each of the items below: a list, numpy and a dictionary

Input

```
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
```

In [6]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

# Create pandas Series
series_from_list = pd.Series(mylist)
series_from_numpy = pd.Series(myarr)
series_from_dict = pd.Series(mydict)

# Print the Series
print("Series from List:\n", series_from_list)
print("\nSeries from NumPy array:\n", series_from_numpy)
print("\nSeries from Dictionary:\n", series_from_dict)


Series from List:
 0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

Series from NumPy array:
 0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
dtype: int32

Series from Dictionary:
 a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int32


#### 3. Convert the series ser into a dataframe with its index as another column on the dataframe.

Input

```
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
```

In [7]:
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)

# Convert Series to DataFrame with index as another column
df = ser.reset_index()

# Rename the columns
df.columns = ['index', 'value']

# Print the DataFrame
print(df)

   index  value
0      a      0
1      b      1
2      c      2
3      e      3
4      d      4
5      f      5
6      g      6
7      h      7
8      i      8
9      j      9
10     k     10
11     l     11
12     m     12
13     n     13
14     o     14
15     p     15
16     q     16
17     r     17
18     s     18
19     t     19
20     u     20
21     v     21
22     w     22
23     x     23
24     y     24
25     z     25


#### 4. Combine ser1 and ser2 to form a dataframe.

Input:

```
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))
```

In [8]:

ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

# Combine ser1 and ser2 into a DataFrame
df = pd.DataFrame({'col1': ser1, 'col2': ser2})

# Print the DataFrame
print(df)

   col1  col2
0     a     0
1     b     1
2     c     2
3     e     3
4     d     4
5     f     5
6     g     6
7     h     7
8     i     8
9     j     9
10    k    10
11    l    11
12    m    12
13    n    13
14    o    14
15    p    15
16    q    16
17    r    17
18    s    18
19    t    19
20    u    20
21    v    21
22    w    22
23    x    23
24    y    24
25    z    25


####  5. Get all items of ser1 and ser2 not common to both.

Input

```
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
```

In [9]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

# Get items not common to both
not_common_items = pd.concat([ser1[~ser1.isin(ser2)], ser2[~ser2.isin(ser1)]])

# Print the result
print(not_common_items)

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64


#### 6. Compute the minimum, 25th percentile, median, 75th, and maximum of ser.

Input

```
ser = pd.Series(np.random.normal(10, 5, 25))

```

In [10]:
ser = pd.Series(np.random.normal(10, 5, 25))

# Compute summary statistics
summary_stats = ser.describe(percentiles=[.25, .5, .75])

# Extract specific percentiles
min_value = summary_stats['min']
percentile_25 = summary_stats['25%']
median = summary_stats['50%']
percentile_75 = summary_stats['75%']
max_value = summary_stats['max']

# Print the results
print(f"Minimum: {min_value}")
print(f"25th Percentile: {percentile_25}")
print(f"Median: {median}")
print(f"75th Percentile: {percentile_75}")
print(f"Maximum: {max_value}")

Minimum: 0.7774296910231442
25th Percentile: 4.941856190447909
Median: 9.013094469163475
75th Percentile: 12.927164540814102
Maximum: 16.792331059917384


#### Problem 7. Calculte the frequency counts of each unique value ser.

Input

```
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
```

In [11]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

# Calculate frequency counts
value_counts = ser.value_counts()

# Print the result
print(value_counts)

a    6
c    5
b    5
e    5
g    4
d    2
f    2
h    1
Name: count, dtype: int64


#### 8. From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

Input

```

np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
```

In [12]:
np.random.seed(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

# Find the top 2 most frequent items
top_items = ser.value_counts().nlargest(2).index

# Replace everything else as 'Other'
ser = ser.apply(lambda x: x if x in top_items else 'Other')

# Print the result
print(ser)

0         1
1         1
2         4
3         4
4         4
5         4
6         1
7     Other
8     Other
9         1
10    Other
11    Other
dtype: object


#### 9. Bin the series ser into 10 equal deciles and replace the values with the bin name.

Input
```
ser = pd.Series(np.random.random(20))
```
Desired Output

```
# First 5 items
0    7th
1    9th
2    7th
3    3rd
4    8th
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]
```

In [13]:
np.random.seed(42)
ser = pd.Series(np.random.random(20))

# Bin the series into 10 equal deciles
bins = pd.qcut(ser, q=10, labels=[f'{i}th' for i in range(1, 11)])

# Print the result for the first 5 items
print(bins.head())

0     5th
1    10th
2     8th
3     7th
4     2th
dtype: category
Categories (10, object): ['1th' < '2th' < '3th' < '4th' ... '7th' < '8th' < '9th' < '10th']


#### 10. Reshape the series ser into a dataframe with 7 rows and 5 columns

Input

```
ser = pd.Series(np.random.randint(1, 10, 35))
```

In [14]:
np.random.seed(42)
ser = pd.Series(np.random.randint(1, 10, 35))

# Reshape the series into a DataFrame with 7 rows and 5 columns
df = pd.DataFrame(ser.values.reshape(7, 5))

# Print the result
print(df)

   0  1  2  3  4
0  7  4  8  5  7
1  3  7  8  5  4
2  8  8  3  6  5
3  2  8  6  2  5
4  1  6  9  1  3
5  7  4  9  3  5
6  3  7  5  9  7
