In [1]:
import pandas as pd
import numpy as np
import random
from dateutil.parser import parse
from collections import Counter

In [2]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
print(data)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [3]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data_pd = data.tolist()
print(data_pd)
print(type(data_pd))

[0.25, 0.5, 0.75, 1.0]
<class 'list'>


In [4]:
s1 = pd.Series([2, 4, 6, 8, 10])
s2 = pd.Series([1, 3, 5, 7, 9])

print("Addition:\n", s1+s2)
print("\nSubtraction:\n", s1-s2)
print("\nMultiplication:\n", s1*s2)
print("\nDivision:\n", s1/s2)

Addition:
 0     3
1     7
2    11
3    15
4    19
dtype: int64

Subtraction:
 0    1
1    1
2    1
3    1
4    1
dtype: int64

Multiplication:
 0     2
1    12
2    30
3    56
4    90
dtype: int64

Division:
 0    2.000000
1    1.333333
2    1.200000
3    1.142857
4    1.111111
dtype: float64


In [5]:
s1 = pd.Series([2, 4, 6, 8, 10])
s2 = pd.Series([1, 3, 5, 7, 10])

print(s1 == s2)

0    False
1    False
2    False
3    False
4     True
dtype: bool


In [6]:
dictionary = {'a': 100, 'b': 200, 'c': 300, 'd': 400, 'e': 800}
series = pd.Series(dictionary)
print(series)

a    100
b    200
c    300
d    400
e    800
dtype: int64


In [7]:
arr = np.array([10, 20, 30, 40, 50])
series = pd.Series(arr)
print(series)

0    10
1    20
2    30
3    40
4    50
dtype: int32


In [8]:
series = pd.Series([100, 200, 'python', 300.12, 400])
print(series)

changed_series = pd.to_numeric(series, errors='coerce') 
print(changed_series)

0       100
1       200
2    python
3    300.12
4       400
dtype: object
0    100.00
1    200.00
2       NaN
3    300.12
4    400.00
dtype: float64


In [9]:
data = {'col1': [1, 2, 3, 4, 7, 11],
       'col2': [4, 5, 6, 9, 5, 0],
       'col3': [7, 5, 8, 12, 1, 11]}

df = pd.DataFrame(data)
print("Original DataFrame\n", df.to_string())

series = df.iloc[:,0]
print("\n1st column as a Series:\n",series.to_string())
print(f"\n<class '{type(series)}'>")

Original DataFrame
    col1  col2  col3
0     1     4     7
1     2     5     5
2     3     6     8
3     4     9    12
4     7     5     1
5    11     0    11

1st column as a Series:
 0     1
1     2
2     3
3     4
4     7
5    11

<class '<class 'pandas.core.series.Series'>'>


In [10]:
series = pd.Series([100, 200, "python", 300.12, 400])
print("Series:\n",series)
print(f"<class '{type(series)}'>")

arr = np.array(series)
print("\nArray:",arr)
print(f"<class '{type(arr)}'>")

Series:
 0       100
1       200
2    python
3    300.12
4       400
dtype: object
<class '<class 'pandas.core.series.Series'>'>

Array: [100 200 'python' 300.12 400]
<class '<class 'numpy.ndarray'>'>


In [11]:
series = pd.Series([['Red','Green','White'],['Red','Black'],['Yellow']])
print(series)

final_series = series.explode().reset_index(drop=True)
# series = data.apply(pd.Series).stack().reset_index(drop=True)
print(final_series)

0    [Red, Green, White]
1           [Red, Black]
2               [Yellow]
dtype: object
0       Red
1     Green
2     White
3       Red
4     Black
5    Yellow
dtype: object


In [12]:
series = pd.Series([100, 200, "python", 300.12, 400])

series_shuffled = series.sample(frac = 1).reset_index(drop = True)

print("Original Data Series:")
print(series)
print("\nShuffled Data Series:")
print(series_shuffled)

Original Data Series:
0       100
1       200
2    python
3    300.12
4       400
dtype: object

Shuffled Data Series:
0       100
1       400
2    python
3    300.12
4       200
dtype: object


In [13]:
series = pd.Series([100, 200, 'python', 300.12, 400])

# series = series.append(pd.Series([500]))
# series = series.append(pd.Series(['php']))
# We get warning and the index is not updated. Hence use concat

new_data = pd.Series([500, 'php'])
series = pd.concat([series,new_data])


print("Original Data Series:")
print(series.iloc[:-2])
print("\nData Series after adding some data:")
print(series)

Original Data Series:
0       100
1       200
2    python
3    300.12
4       400
dtype: object

Data Series after adding some data:
0       100
1       200
2    python
3    300.12
4       400
0       500
1       php
dtype: object


In [14]:
s = pd.Series(range(11))

subset = s[s <= 5]

print("Original Data Series:")
print(s)
print("\nSubset of the above Data Series:")
print(subset)

Original Data Series:
0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
dtype: int64

Subset of the above Data Series:
0    0
1    1
2    2
3    3
4    4
5    5
dtype: int64


In [15]:
series = pd.Series([1, 2, 3, 4, 5], index = ['A', 'B', 'C', 'D', 'E'])

series_shuffled = series.reindex(['B', 'A', 'C', 'D', 'E'])

print("Original Data Series:")
print(series)
print("\nShuffled Data Series:")
print(series_shuffled)

Original Data Series:
A    1
B    2
C    3
D    4
E    5
dtype: int64

Shuffled Data Series:
B    2
A    1
C    3
D    4
E    5
dtype: int64


In [16]:
#series = pd.Series(range(11))
series = pd.Series([1,2,3,4,5,6,7,8,9,10])

mean = series.mean()
std_dev = series.std()

print("Series:\n",series)
print("Mean:",mean)
print("Standard Deviation:",std_dev)

Series:
 0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64
Mean: 5.5
Standard Deviation: 3.0276503540974917


In [17]:
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([2,4,6,8,10])

not_in_s2 = s1[~s1.isin(s2)]

print("Two Series:")
print("Series 1:\n",s1)
print("Series 2:\n",s2)
print("Items Series 1 but not in series 2:\n",not_in_s2)

Two Series:
Series 1:
 0    1
1    2
2    3
3    4
4    5
dtype: int64
Series 2:
 0     2
1     4
2     6
3     8
4    10
dtype: int64
Items Series 1 but not in series 2:
 0    1
2    3
4    5
dtype: int64


In [18]:
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([2,4,6,8,10])

#not_in_s1 = s2[~s2.isin(s1)]
not_common = pd.Series(np.union1d(s1[~s1.isin(s2)] , s2[~s2.isin(s1)]))
print("Two Series:")
print("Series 1:\n",s1)
print("Series 2:\n",s2)
print("Uncommon in series 2:\n",not_common)

Two Series:
Series 1:
 0    1
1    2
2    3
3    4
4    5
dtype: int64
Series 2:
 0     2
1     4
2     6
3     8
4    10
dtype: int64
Uncommon in series 2:
 0     1
1     3
2     5
3     6
4     8
5    10
dtype: int64


In [19]:
s_state = np.random.RandomState(100)

s = pd.Series(s_state.normal(10, 4, 20))


min_val = s.min()
q1 = s.quantile(q=0.25)
median = s.median()
q3 = s.quantile(q=0.75)
max_val = s.max()


print("Original Series:\n",s)
print("\nMinimum, 25th percentile, median, 75th, and maximum of a given series:")
print([min_val, q1, median, q3, max_val])

Original Series:
 0      3.000938
1     11.370722
2     14.612143
3      8.990256
4     13.925283
5     12.056875
6     10.884719
7      5.719827
8      9.242017
9     11.020006
10     8.167892
11    11.740654
12     7.665620
13    13.267388
14    12.690883
15     9.582355
16     7.874878
17    14.118931
18     8.247458
19     5.526727
dtype: float64

Minimum, 25th percentile, median, 75th, and maximum of a given series:
[3.0009381077812103, 8.094638666640218, 10.233537051661546, 12.215377329892807, 14.612143210254576]


In [20]:
s = pd.Series(np.random.randint(100, size=40))
value_counts = s.value_counts(sort=False)

print("Original Series:")
print(s)
print("\nFrequency of each unique value of the said series:")
print(value_counts)

Original Series:
0     21
1     17
2     41
3     82
4     59
5     17
6     45
7     50
8     57
9     28
10    36
11    22
12    63
13    18
14    39
15    38
16    79
17    59
18    41
19     1
20    35
21    30
22    23
23    90
24    16
25    54
26     5
27    89
28    45
29    95
30    70
31    59
32    34
33    74
34    67
35    81
36    84
37    46
38    10
39     0
dtype: int32

Frequency of each unique value of the said series:
21    1
17    2
41    2
82    1
59    3
45    2
50    1
57    1
28    1
36    1
22    1
63    1
18    1
39    1
38    1
79    1
1     1
35    1
30    1
23    1
90    1
16    1
54    1
5     1
89    1
95    1
70    1
34    1
74    1
67    1
81    1
84    1
46    1
10    1
0     1
dtype: int64


In [21]:
s = pd.Series(np.random.randint(1, 5, [15]))

most_frequent_value = s.value_counts().index[0]
s1 = s.apply(lambda x: 'Other' if x != most_frequent_value else x)

print("Original Series:")
print(s.iloc[:-2])
print("\nTop 2 Freq:")
print(s.value_counts())
print("\nModified Series:")
print(s1)

Original Series:
0     4
1     1
2     4
3     4
4     2
5     1
6     3
7     3
8     1
9     3
10    1
11    4
12    3
dtype: int32

Top 2 Freq:
4    4
1    4
3    4
2    3
dtype: int64

Modified Series:
0         4
1     Other
2         4
3         4
4     Other
5     Other
6     Other
7     Other
8     Other
9     Other
10    Other
11        4
12    Other
13    Other
14    Other
dtype: object


In [22]:
s = pd.Series(np.random.randint(1, 10, 9))

positions = [i for i, element in enumerate(s) if element % 5 == 0]

print("Original Series:")
print(s)
print("\nPositions of numbers that are multiples of 5:")
print(positions)

Original Series:
0    2
1    1
2    7
3    6
4    9
5    8
6    7
7    7
8    5
dtype: int32

Positions of numbers that are multiples of 5:
[8]


In [23]:
num_series = pd.Series(list('2390238923902390239023'))
element_pos = [0, 2, 6, 11, 21]
print("Original Series:")
print(num_series)
result = num_series.take(element_pos)
print("\nExtract items at given positions of the said series:")
print(result)

Original Series:
0     2
1     3
2     9
3     0
4     2
5     3
6     8
7     9
8     2
9     3
10    9
11    0
12    2
13    3
14    9
15    0
16    2
17    3
18    9
19    0
20    2
21    3
dtype: object

Extract items at given positions of the said series:
0     2
2     9
6     8
11    0
21    3
dtype: object


In [24]:
series1 = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
series2 = pd.Series([1, 3, 5, 7, 10])

print("Original Series:")
print("Series 1:\n",series1)
print("Series 2:\n",series2)

result = [pd.Index(series1).get_loc(i) for i in series2]
print("Positions of items of series2 in series1:")
print(result)

Original Series:
Series 1:
 0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64
Series 2:
 0     1
1     3
2     5
3     7
4    10
dtype: int64
Positions of items of series2 in series1:
[0, 2, 4, 6, 9]


In [25]:
series1 = pd.Series(['php', 'python', 'java', 'c#'])
print("Original Series:\n",series1)

result = series1.map(lambda x: x[0].upper() + x[1:-1] + x[-1].upper())

print("\nFirst and last character of each word to upper case:")
print(result)

Original Series:
 0       php
1    python
2      java
3        c#
dtype: object

First and last character of each word to upper case:
0       PhP
1    PythoN
2      JavA
3        C#
dtype: object


In [26]:
series1 = pd.Series(['Php', 'Python', 'Java', 'C#'])
print("Original Series:\n",series1)

result = series1.map(lambda x: len(x))

print("\nNumber of characters in each word in the said series:")
print(result)

Original Series:
 0       Php
1    Python
2      Java
3        C#
dtype: object

Number of characters in each word in the said series:
0    3
1    6
2    4
3    2
dtype: int64


In [27]:
series1 = pd.Series([1, 3, 5, 8, 10, 11, 15])
print("Original Series:")
print(series1)
print("\nDifference of differences between consecutive numbers of the said series:")
print(series1.diff().tolist())
print(series1.diff().diff().tolist())

Original Series:
0     1
1     3
2     5
3     8
4    10
5    11
6    15
dtype: int64

Difference of differences between consecutive numbers of the said series:
[nan, 2.0, 2.0, 3.0, 2.0, 1.0, 4.0]
[nan, nan, 0.0, 1.0, -1.0, -1.0, 3.0]


In [28]:
date_series = pd.Series(['01 Jan 2015', 
                         '10-02-2016', 
                         '20180307', 
                         '2014/05/06', 
                         '2016-04-12', 
                         '2019-04-06T11:20'])

print("Original Series:")
print(date_series)

print("\nSeries of date strings to a timeseries:")
print(pd.to_datetime(date_series))

Original Series:
0         01 Jan 2015
1          10-02-2016
2            20180307
3          2014/05/06
4          2016-04-12
5    2019-04-06T11:20
dtype: object

Series of date strings to a timeseries:
0   2015-01-01 00:00:00
1   2016-10-02 00:00:00
2   2018-03-07 00:00:00
3   2014-05-06 00:00:00
4   2016-04-12 00:00:00
5   2019-04-06 11:20:00
dtype: datetime64[ns]


In [29]:
date_series = pd.Series(['01 Jan 2015', 
                         '10-02-2016', 
                         '20180307', 
                         '2014/05/06', 
                         '2016-04-12', 
                         '2019-04-06T11:20'])

print("Original Series:")
print(date_series)

date_series = date_series.map(lambda x: parse(x))

print("\nDay of month:")
print(date_series.dt.day.tolist())

print("\nDay of year:")
print(date_series.dt.dayofyear.tolist())

print("\nWeek number:")
print(date_series.dt.isocalendar().week.tolist())

print("\nDay of week:")
print(date_series.dt.day_name().tolist())

Original Series:
0         01 Jan 2015
1          10-02-2016
2            20180307
3          2014/05/06
4          2016-04-12
5    2019-04-06T11:20
dtype: object

Day of month:
[1, 2, 7, 6, 12, 6]

Day of year:
[1, 276, 66, 126, 103, 96]

Week number:
[1, 39, 10, 19, 15, 14]

Day of week:
['Thursday', 'Sunday', 'Wednesday', 'Tuesday', 'Tuesday', 'Saturday']


In [30]:
date_series = pd.Series(['Jan 2015', 
                         'Feb 2016', 
                         'Mar 2017', 
                         'Apr 2018', 
                         'May 2019'])

print("Original Series:")
print(date_series)
print("\nNew dates:")
result = date_series.map(lambda d: parse('11 ' + d))
print(result)

Original Series:
0    Jan 2015
1    Feb 2016
2    Mar 2017
3    Apr 2018
4    May 2019
dtype: object

New dates:
0   2015-01-11
1   2016-02-11
2   2017-03-11
3   2018-04-11
4   2019-05-11
dtype: datetime64[ns]


In [31]:
color_series = pd.Series(['Red', 'Green', 'Orange', 'Pink', 'Yellow', 'White'])
print("Original Series:")
print(color_series)

print("\nFiltered words:")
result =color_series.map(lambda c: sum([Counter(c.lower()).get(i, 0) for i in list('aeiou')]) >= 2)
print(color_series[result])

Original Series:
0       Red
1     Green
2    Orange
3      Pink
4    Yellow
5     White
dtype: object

Filtered words:
1     Green
2    Orange
4    Yellow
5     White
dtype: object


In [32]:
x = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y = pd.Series([11, 8, 7, 5, 6, 5, 3, 4, 7, 1])

print("Original series:")
print(x)
print(y)

print("\nEuclidean distance between two said series:")
print(np.linalg.norm(x-y))

Original series:
0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64
0    11
1     8
2     7
3     5
4     6
5     5
6     3
7     4
8     7
9     1
dtype: int64

Euclidean distance between two said series:
16.492422502470642


In [33]:
nums = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
print("Original series:")
print(nums)

print("\nPositions of the values surrounded by smaller values on both sides:")
temp = np.diff(np.sign(np.diff(nums)))
result = np.where(temp == -2)[0] + 1
print(result)

Original series:
0    1
1    8
2    7
3    5
4    6
5    5
6    3
7    4
8    7
9    1
dtype: int64

Positions of the values surrounded by smaller values on both sides:
[1 4 8]


In [34]:
str1 = 'abc def abcdef icd'
print("Original series:")
print(str1)

ser = pd.Series(list(str1))
element_freq = ser.value_counts()
print(element_freq)

current_freq = element_freq.dropna().index[-1]
result = "".join(ser.replace(' ', current_freq))
print(result)

Original series:
abc def abcdef icd
c    3
     3
d    3
a    2
b    2
e    2
f    2
i    1
dtype: int64
abcidefiabcdefiicd


In [35]:
num_series = pd.Series(np.arange(15) + np.random.normal(1, 10, 15))
print("Original series:")
print(num_series)

autocorrelations = [num_series.autocorr(i).round(2) for i in range(11)]
print("\nAutocorrelations of the said series:")
print(autocorrelations[1:])

Original series:
0     -1.362240
1     -2.703430
2      3.954793
3     24.734071
4     11.085042
5     17.786035
6    -11.391470
7      1.750378
8      3.253977
9     24.029926
10    22.394599
11    15.602720
12   -17.907786
13    16.003760
14    16.630147
dtype: float64

Autocorrelations of the said series:
[0.02, -0.27, -0.73, 0.02, 0.24, 0.81, -0.1, -0.16, -0.81, 0.08]


In [36]:
result = pd.Series(pd.date_range('2020-01-01', periods=52, freq='W-SUN'))
print("All Sundays of 2019:")
print(result)

All Sundays of 2019:
0    2020-01-05
1    2020-01-12
2    2020-01-19
3    2020-01-26
4    2020-02-02
5    2020-02-09
6    2020-02-16
7    2020-02-23
8    2020-03-01
9    2020-03-08
10   2020-03-15
11   2020-03-22
12   2020-03-29
13   2020-04-05
14   2020-04-12
15   2020-04-19
16   2020-04-26
17   2020-05-03
18   2020-05-10
19   2020-05-17
20   2020-05-24
21   2020-05-31
22   2020-06-07
23   2020-06-14
24   2020-06-21
25   2020-06-28
26   2020-07-05
27   2020-07-12
28   2020-07-19
29   2020-07-26
30   2020-08-02
31   2020-08-09
32   2020-08-16
33   2020-08-23
34   2020-08-30
35   2020-09-06
36   2020-09-13
37   2020-09-20
38   2020-09-27
39   2020-10-04
40   2020-10-11
41   2020-10-18
42   2020-10-25
43   2020-11-01
44   2020-11-08
45   2020-11-15
46   2020-11-22
47   2020-11-29
48   2020-12-06
49   2020-12-13
50   2020-12-20
51   2020-12-27
dtype: datetime64[ns]


In [37]:
char_list = list('ABCDEFGHIJKLMNOP')
num_arra = np.arange(8)
num_dict = dict(zip(char_list, num_arra))
num_ser = pd.Series(num_dict)
df = num_ser.to_frame().reset_index()
print(df.head())

  index  0
0     A  0
1     B  1
2     C  2
3     D  3
4     E  4


In [38]:
series1 = pd.Series(range(10))
series2 = pd.Series(list('pqrstuvwxy'))
print("Original Series:")
print(series1)
print(series2)

series1.append(series2)
df = pd.concat([series1, series2], axis=1)
print("\nStack two given series vertically and horizontally:")
print(df)

Original Series:
0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64
0    p
1    q
2    r
3    s
4    t
5    u
6    v
7    w
8    x
9    y
dtype: object

Stack two given series vertically and horizontally:
   0  1
0  0  p
1  1  q
2  2  r
3  3  s
4  4  t
5  5  u
6  6  v
7  7  w
8  8  x
9  9  y


  series1.append(series2)


In [39]:
nums1 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
nums2 = pd.Series([1, 8, 7, 5, 6, 5, 3, 4, 7, 1])
print("Original Series:")
print(nums1)
print(nums2)

print("Check 2 series are equal or not?")
print(nums1 == nums2)

Original Series:
0    1
1    8
2    7
3    5
4    6
5    5
6    3
7    4
8    7
9    1
dtype: int64
0    1
1    8
2    7
3    5
4    6
5    5
6    3
7    4
8    7
9    1
dtype: int64
Check 2 series are equal or not?
0    True
1    True
2    True
3    True
4    True
5    True
6    True
7    True
8    True
9    True
dtype: bool


In [40]:
nums = pd.Series([1, 3, 7, 12, 88, 23, 3, 1, 9, 0])
print("Original Series:")
print(nums)

print("Index of the first occurrence of the smallest and largest value of the said series:")
print(nums.idxmin())
print(nums.idxmax())

Original Series:
0     1
1     3
2     7
3    12
4    88
5    23
6     3
7     1
8     9
9     0
dtype: int64
Index of the first occurrence of the smallest and largest value of the said series:
9
4


In [41]:
df_data = pd.DataFrame({'W':[68,75,86,80,None],'X':[78,75,None,80,86], 'Y':[84,94,89,86,86],'Z':[86,97,96,72,83]});
sr_data = pd.Series([68, 75, 86, 80, None]) 

print("Original DataFrame:")
print(df_data)

print("\nOriginal Series:")
print(sr_data)

print("\nCheck for inequality of the said series & dataframe:")
print(df_data.ne(sr_data, axis = 0))

Original DataFrame:
      W     X   Y   Z
0  68.0  78.0  84  86
1  75.0  75.0  94  97
2  86.0   NaN  89  96
3  80.0  80.0  86  72
4   NaN  86.0  86  83

Original Series:
0    68.0
1    75.0
2    86.0
3    80.0
4     NaN
dtype: float64

Check for inequality of the said series & dataframe:
       W      X     Y     Z
0  False   True  True  True
1  False  False  True  True
2  False   True  True  True
3  False  False  True  True
4   True   True  True  True
