In [1]:
import pandas as pd
import numpy as np

In [2]:
# 1. Create a one-dimensional array-like object containing an array of data.
s1 = pd.Series([1, 2, 3, 4, 5])
print(s1)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [3]:
# 2. Convert a Pandas Series to Python list and its type.
print(s1.tolist(), type(s1.tolist()))

[1, 2, 3, 4, 5] <class 'list'>


In [4]:
# 3. Add, subtract, multiply, and divide two Pandas Series.
s2 = pd.Series([2, 4, 6, 8, 10])
s3 = pd.Series([1, 3, 5, 7, 9])
print(s2 + s3, s2 - s3, s2 * s3, s2 / s3)

0     3
1     7
2    11
3    15
4    19
dtype: int64 0    1
1    1
2    1
3    1
4    1
dtype: int64 0     2
1    12
2    30
3    56
4    90
dtype: int64 0    2.000000
1    1.333333
2    1.200000
3    1.142857
4    1.111111
dtype: float64


In [5]:
# 4. Compare the elements of two Pandas Series.
s4 = pd.Series([2, 4, 6, 8, 10])
s5 = pd.Series([1, 3, 5, 7, 10])
print(s4 == s5, s4 > s5, s4 < s5)

0    False
1    False
2    False
3    False
4     True
dtype: bool 0     True
1     True
2     True
3     True
4    False
dtype: bool 0    False
1    False
2    False
3    False
4    False
dtype: bool


In [6]:
# 5. Convert a dictionary to a Pandas series.
d = {'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
s6 = pd.Series(d)
print(s6)

a    100
b    200
c    300
d    400
e    800
dtype: int64


In [7]:
# 6. Convert a NumPy array to a Pandas series.
arr = np.array([10, 20, 30, 40, 50])
s7 = pd.Series(arr)
print(s7)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [8]:
# 7. Change the data type of a given Series.
s8 = pd.Series([100, 200, 'python', 300.12, 400])
s8_numeric = pd.to_numeric(s8, errors='coerce')
print(s8_numeric)

0    100.00
1    200.00
2       NaN
3    300.12
4    400.00
dtype: float64


In [9]:
# 8. Convert the first column of a DataFrame as a Series.
df = pd.DataFrame({
    'col1': [1, 2, 3, 4, 7, 11],
    'col2': [4, 5, 6, 9, 5, 0],
    'col3': [7, 5, 8, 12, 1, 11]
})
s9 = df['col1']
print(s9, type(s9))

0     1
1     2
2     3
3     4
4     7
5    11
Name: col1, dtype: int64 <class 'pandas.core.series.Series'>


In [10]:
# 9. Convert a given Series to an array.
s10 = pd.Series([100, 200, 'python', 300.12, 400])
arr2 = s10.to_numpy()
print(arr2, type(arr2))

[100 200 'python' 300.12 400] <class 'numpy.ndarray'>


In [11]:
# 10. Convert Series of lists to one Series.
s11 = pd.Series([['Red', 'Green', 'White'], ['Red', 'Black'], ['Yellow']])
s11_flat = pd.Series([i for item in s11 for i in item])
print(s11_flat)

0       Red
1     Green
2     White
3       Red
4     Black
5    Yellow
dtype: object


In [12]:
# 11. Sort a given Series.
s12 = pd.Series([100, 200, 'python', 300.12, 400])

# Filter out non-numeric values for sorting
s12_numeric = s12[pd.to_numeric(s12, errors='coerce').notna()]
sorted_s12 = s12_numeric.sort_values()
print(sorted_s12)

s12 = pd.Series([100, 200, 'python', 300.12, 400])

# Convert everything to string before sorting
sorted_s12_as_string = s12.astype(str).sort_values()
print(sorted_s12_as_string)


0       100
1       200
3    300.12
4       400
dtype: object
0       100
1       200
3    300.12
4       400
2    python
dtype: object


In [13]:
# 12. Add some data to an existing Series.
# Create the original Series
s13 = pd.Series([100, 200, 'python', 300.12, 400])

# Use pd.concat() to add new elements
s13_extended = pd.concat([s13, pd.Series([500, 'php'])])

print(s13_extended)

0       100
1       200
2    python
3    300.12
4       400
0       500
1       php
dtype: object


In [14]:
# 13. Create a subset of a Series based on value and condition.
s14 = pd.Series(range(11))
subset = s14[s14 <= 5]
print(subset)

0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64


In [15]:
# 14. Change the order of index of a given series.
s15 = pd.Series([1, 2, 3, 4, 5], index=['A', 'B', 'C', 'D', 'E'])
s15_reordered = s15.reindex(['B', 'A', 'C', 'D', 'E'])
print(s15_reordered)

B    2
A    1
C    3
D    4
E    5
dtype: int64


In [16]:
# 15. Create the mean and standard deviation of the data of a given Series.
s16 = pd.Series([1, 2, 3, 8, 9, 5, 3])
print(s16.mean(), s16.std())

4.428571428571429 3.0472470011002204


In [17]:
# 16. Get items of a given series not present in another series.
s17 = pd.Series([1, 2, 3, 4, 5])
s18 = pd.Series([2, 4, 6, 8, 10])
print(s17[~s17.isin(s18)])

0    1
2    3
4    5
dtype: int64


In [18]:
# 17. Get items which are not common between two series.
print(pd.concat([s17[~s17.isin(s18)], s18[~s18.isin(s17)]]))

0     1
2     3
4     5
2     6
3     8
4    10
dtype: int64


In [19]:
# 18. Compute the minimum, 25th percentile, median, 75th percentile, and maximum of a given series.
s19 = pd.Series(np.random.uniform(1, 15, 20))
print(np.percentile(s19, [0, 25, 50, 75, 100]))

[ 1.21543893  5.08675975  7.46217926  9.63135478 13.46019456]


In [20]:
# 19. Frequency counts of each unique value of a given series.
s20 = pd.Series([1, 7, 1, 6, 0, 4, 8, 0])
print(s20.value_counts())

1    2
0    2
7    1
6    1
4    1
8    1
Name: count, dtype: int64


In [21]:
# 20. Display most frequent value and replace everything else as 'Other'.
s21 = pd.Series([3, 1, 1, 3, 2, 3, 2, 3])
top_freq = s21.value_counts().index[0]
s21_replaced = s21.apply(lambda x: x if x == top_freq else 'Other')
print(s21_replaced)

0        3
1    Other
2    Other
3        3
4    Other
5        3
6    Other
7        3
dtype: object


In [22]:
# 21. Find the positions of numbers that are multiples of 5.
s22 = pd.Series([1, 9, 8, 6, 9, 7, 1, 1, 1])
print(s22[s22 % 5 == 0].index.tolist())

[]


In [23]:
# 22. Extract items at given positions of a given series.
s23 = pd.Series([2, 3, 9, 0, 2, 3, 9, 0, 2, 3])
positions = [0, 2, 6]
print(s23.iloc[positions])

0    2
2    9
6    9
dtype: int64


In [24]:
# 23. Get the positions of items of a given series in another given series.
s24 = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
s25 = pd.Series([1, 3, 5, 7, 10])
print([s24[s24 == i].index[0] for i in s25])

[np.int64(0), np.int64(2), np.int64(4), np.int64(6), np.int64(9)]


In [25]:
# 24. Convert the first and last character of each word to upper case.
s26 = pd.Series(['php', 'python', 'java', 'c#'])
s26_modified = s26.apply(lambda x: x[0].upper() + x[1:-1] + x[-1].upper())
print(s26_modified)

0       PhP
1    PythoN
2      JavA
3        C#
dtype: object


In [26]:
# 25. Calculate the number of characters in each word in a given series.
s27 = pd.Series(['Php', 'Python', 'Java', 'C#'])
print(s27.apply(len))

0    3
1    6
2    4
3    2
dtype: int64


In [27]:
# 26. Compute the difference of differences between consecutive numbers of a given series.
s28 = pd.Series([1, 3, 5, 8, 10, 11, 15])
diff_1 = s28.diff().tolist()
diff_2 = s28.diff().diff().tolist()
print(diff_1, diff_2)

[nan, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0] [nan, nan, 0.0, 1.0, -1.0, -1.0, 3.0]


In [28]:
# 27. Convert a series of date strings to a timeseries.
# Create the series of date strings
s29 = pd.Series(['01 Jan 2015', '10-02-2016', '20180307', '2014/05/06', '2016-04-12', '2019-04-06T11:20'])

# Initialize an empty list to hold the parsed dates
parsed_dates = []

# Loop through the series and try to parse each date string
for date_str in s29:
    try:
        # Try different formats sequentially
        date = pd.to_datetime(date_str, format='%d %b %Y', errors='raise')  # 01 Jan 2015
    except ValueError:
        try:
            date = pd.to_datetime(date_str, format='%d-%m-%Y', errors='raise')  # 10-02-2016
        except ValueError:
            try:
                date = pd.to_datetime(date_str, format='%Y%m%d', errors='raise')  # 20180307
            except ValueError:
                try:
                    date = pd.to_datetime(date_str, format='%Y/%m/%d', errors='raise')  # 2014/05/06
                except ValueError:
                    try:
                        date = pd.to_datetime(date_str, format='%Y-%m-%dT%H:%M', errors='raise')  # 2019-04-06T11:20
                    except ValueError:
                        date = pd.NaT  # If all formats fail, assign NaT

    parsed_dates.append(date)

# Convert the list of parsed dates to a Series
s30 = pd.Series(parsed_dates)

# Output the results
print("Parsed Dates:")
print(s30)

# Extract the required date components
print("\nDay of month:", s30.dt.day)
print("Day of year:", s30.dt.dayofyear)
print("Week number:", s30.dt.isocalendar().week)
print("Day of week:", s30.dt.day_name())

Parsed Dates:
0   2015-01-01 00:00:00
1   2016-02-10 00:00:00
2   2018-03-07 00:00:00
3   2014-05-06 00:00:00
4                   NaT
5   2019-04-06 11:20:00
dtype: datetime64[ns]

Day of month: 0     1.0
1    10.0
2     7.0
3     6.0
4     NaN
5     6.0
dtype: float64
Day of year: 0      1.0
1     41.0
2     66.0
3    126.0
4      NaN
5     96.0
dtype: float64
Week number: 0       1
1       6
2      10
3      19
4    <NA>
5      14
Name: week, dtype: UInt32
Day of week: 0     Thursday
1    Wednesday
2    Wednesday
3      Tuesday
4          NaN
5     Saturday
dtype: object


In [29]:
# 28. Get day of month, day of year, week number and day of week from a given series of date strings.


# Original series of date strings
s30 = pd.Series(['01 Jan 2015', '10-02-2016', '20180307', '2014/05/06', '2016-04-12', '2019-04-06T11:20'])

# Manually parse the series by trying different date formats
s30_parsed = pd.to_datetime(s30, errors='coerce', format='%d %b %Y')  # First format
s30_parsed.fillna(pd.to_datetime(s30, errors='coerce', format='%d-%m-%Y'), inplace=True)  # Second format
s30_parsed.fillna(pd.to_datetime(s30, errors='coerce', format='%Y%m%d'), inplace=True)  # Third format
s30_parsed.fillna(pd.to_datetime(s30, errors='coerce', format='%Y/%m/%d'), inplace=True)  # Fourth format
s30_parsed.fillna(pd.to_datetime(s30, errors='coerce', format='%Y-%m-%dT%H:%M'), inplace=True)  # Fifth format

# Extract the required date components
print("Day of month:", s30_parsed.dt.day)
print("Day of year:", s30_parsed.dt.dayofyear)
print("Week number:", s30_parsed.dt.isocalendar().week)
print("Day of week:", s30_parsed.dt.day_name())

Day of month: 0     1.0
1    10.0
2     7.0
3     6.0
4     NaN
5     6.0
dtype: float64
Day of year: 0      1.0
1     41.0
2     66.0
3    126.0
4      NaN
5     96.0
dtype: float64
Week number: 0       1
1       6
2      10
3      19
4    <NA>
5      14
Name: week, dtype: UInt32
Day of week: 0     Thursday
1    Wednesday
2    Wednesday
3      Tuesday
4          NaN
5     Saturday
dtype: object


In [30]:
# 29. Convert year-month string to dates adding a specified day of the month.
s31 = pd.Series(['Jan 2015', 'Feb 2016', 'Mar 2017', 'Apr 2018', 'May 2019'])
print(pd.to_datetime(s31) + pd.offsets.Day(11))

0   2015-01-12
1   2016-02-12
2   2017-03-12
3   2018-04-12
4   2019-05-12
dtype: datetime64[ns]


  print(pd.to_datetime(s31) + pd.offsets.Day(11))


In [31]:
# 30. Filter words from a given series that contain at least two vowels.
s32 = pd.Series(['Red', 'Green', 'Orange', 'Pink', 'Yellow', 'White'])
vowel_count = s32.apply(lambda x: sum([1 for ch in x.lower() if ch in 'aeiou']))
print(s32[vowel_count >= 2])

1     Green
2    Orange
4    Yellow
5     White
dtype: object


In [32]:
# 31. Compute the Euclidean distance between two given series.
s33 = pd.Series(range(1, 11))
s34 = pd.Series([11, 8, 7, 5, 6, 5, 3, 4, 7, 1])
print(np.sqrt(np.sum((s33 - s34) ** 2)))

16.492422502470642


In [33]:
# 32. Find positions of values neighboured by smaller values on both sides in a given series.
s35 = pd.Series([1, 8, 7, 5, 6, 5, 10, 5, 7, 1])
print(np.where((s35 > s35.shift(1)) & (s35 > s35.shift(-1)))[0])

[1 4 6 8]


In [34]:
# 33. Replace missing white spaces in a given string with the least frequent character.
s36 = pd.Series(list('dbc deb ab'))
char_freq = s36.value_counts()
least_freq_char = char_freq.idxmin()
s36_replaced = s36.replace(' ', least_freq_char)
print(''.join(s36_replaced))

dbccdebcab


In [35]:
# 34. Compute the autocorrelations of a given series.
s37 = pd.Series([10, 25, 8, 60, 45, 31, 40])
print([s37.autocorr(lag=i) for i in range(1, 4)])

[np.float64(0.016560738776985582), np.float64(0.08858219635796921), np.float64(-0.18147522179999118)]


In [36]:
# 35. Count the number of consecutive occurrences of a given value in a series.
s38 = pd.Series([2, 2, 3, 3, 3, 2, 4, 4, 4, 4])
consecutive_counts = (s38 != s38.shift()).cumsum()  # Groups of consecutive values
count_consecutive = s38.groupby(consecutive_counts).transform('size')
print(count_consecutive)

0    2
1    2
2    3
3    3
4    3
5    1
6    4
7    4
8    4
9    4
dtype: int64


In [37]:
# 36. Find the positions of the first occurrence of the smallest and largest value of a series.
s39 = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
min_pos = s39.idxmin()
max_pos = s39.idxmax()
print(min_pos, max_pos)

0 9


In [38]:
# 37. Replace missing values in a given series using a specified method.
s40 = pd.Series([1, 2, np.nan, 4, np.nan, 6, 7])
filled_s40 = s40.fillna(method='ffill')
print(filled_s40)

0    1.0
1    2.0
2    2.0
3    4.0
4    4.0
5    6.0
6    7.0
dtype: float64


  filled_s40 = s40.fillna(method='ffill')


In [39]:
# 38. Find the index of the first occurrence of the smallest value greater than a given number.
s41 = pd.Series([10, 20, 30, 40, 50, 60, 70, 80, 90])
threshold = 35
first_idx = s41[s41 > threshold].idxmin()
print(first_idx)

3


In [40]:
# 39. Stack two given series vertically and horizontally.
s42 = pd.Series([1, 2, 3, 4])
s43 = pd.Series([5, 6, 7, 8])
stacked_vertically = pd.concat([s42, s43], axis=0).reset_index(drop=True)
stacked_horizontally = pd.concat([s42, s43], axis=1).reset_index(drop=True)
print(stacked_vertically, stacked_horizontally)

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64    0  1
0  1  5
1  2  6
2  3  7
3  4  8


In [41]:
# 40. Get the positions of values of a series that are multiples of 10.
s44 = pd.Series([10, 20, 33, 46, 50, 55, 60])
positions_multiples_10 = s44[s44 % 10 == 0].index.tolist()
print(positions_multiples_10)

[0, 1, 4, 6]


In [42]:
# 41. Change the order of the columns of a DataFrame.
df2 = pd.DataFrame({
    'col1': [1, 2, 3, 4],
    'col2': [5, 6, 7, 8],
    'col3': [9, 10, 11, 12]
})
df2_reordered = df2[['col3', 'col1', 'col2']]  # Reordering columns
print(df2_reordered)

   col3  col1  col2
0     9     1     5
1    10     2     6
2    11     3     7
3    12     4     8
