# Data Series Creation in Pandas

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math as math
import random as rnd

In [2]:
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

In [4]:
weekdaysSeries = pd.Series(weekdays)
weekdaysSeries

0       Monday
1      Tuesday
2    Wednesday
3     Thursday
4       Friday
5     Saturday
6       Sunday
dtype: object

In [5]:
freeDays = [False, False, False, False, False, True, True]

In [6]:
freeDaysSeries = pd.Series(freeDays)
freeDaysSeries

0    False
1    False
2    False
3    False
4    False
5     True
6     True
dtype: bool

In [7]:
holidays = {'New Year': '01-01',
           'Independence Day': '07-04',
           'Christmas': '12-25'}

In [8]:
holidaysSeries = pd.Series(holidays)
holidaysSeries

New Year            01-01
Independence Day    07-04
Christmas           12-25
dtype: object

# Data Series attributes

In [19]:
dataAsFloatList = [i * rnd.random() for i in range(100001)]
dataAsFloatSeries = pd.Series(dataAsFloatList)

In [21]:
dataAsFloatSeries.size


100001

In [22]:
dataAsFloatSeries.nbytes

800008

In [23]:
dataAsFloatSeries.shape

(100001,)

In [34]:
dataAsFloatSeries.dtypes

dtype('float64')

In [24]:
dataAsFloatSeries.axes

[RangeIndex(start=0, stop=100001, step=1)]

In [25]:
dataAsFloatSeries.index

RangeIndex(start=0, stop=100001, step=1)

In [26]:
dataAsFloatSeries.is_unique

True

In [27]:
dataAsFloatSeries.is_monotonic_increasing

False

In [29]:
dataAsFloatSeries.is_monotonic_decreasing

False

In [30]:
dataAsStringList = [str(i* rnd.random()) for i in range(100001)]
dataasStringSeries = pd.Series(dataAsStringList)

In [31]:
dataasStringSeries.size

100001

In [32]:
dataasStringSeries.nbytes

800008

In [33]:
dataasStringSeries.dtypes

dtype('O')

# Data Series methods

In [37]:
cities = ['New York', 'Los Angeles', 'Chicago']
population = [8419600, 3980400, 2716000]

In [38]:
citypop = pd.Series(index = cities, data = population)
citypop

New York       8419600
Los Angeles    3980400
Chicago        2716000
dtype: int64

In [40]:
round(citypop.mean())

5038667

In [41]:
citypop.sum()

np.int64(15116000)

In [42]:
citypop.index

Index(['New York', 'Los Angeles', 'Chicago'], dtype='object')

In [43]:
citypop.keys()

Index(['New York', 'Los Angeles', 'Chicago'], dtype='object')

In [46]:
citypop.values

array([8419600, 3980400, 2716000])

# Data Series filtering

In [47]:
age = ["less than 6", "7-14", "15-17", "18-24", "25-39", "40-59", "more than 60"]
values = [14 ,334,312,5823,9491,7486,4343]
incidentsSeries = pd.Series(data = values, index = age)
incidentsSeries

less than 6       14
7-14             334
15-17            312
18-24           5823
25-39           9491
40-59           7486
more than 60    4343
dtype: int64

In [54]:
newIncidentsSeries = incidentsSeries.where(incidentsSeries > 1000).dropna()
#original series is not modified
print(newIncidentsSeries)
print(incidentsSeries)

18-24           5823.0
25-39           9491.0
40-59           7486.0
more than 60    4343.0
dtype: float64
less than 6       14
7-14             334
15-17            312
18-24           5823
25-39           9491
40-59           7486
more than 60    4343
dtype: int64


In [55]:
incidentsSeries.filter(items=['18-24', '25-39', '40-59'])

18-24    5823
25-39    9491
40-59    7486
dtype: int64

In [57]:
incidentsSeries.where(incidentsSeries <= 1000, inplace=True)
incidentsSeries.dropna(inplace=True)
incidentsSeries

less than 6     14.0
7-14           334.0
15-17          312.0
dtype: float64

In [60]:
namesList = ['Albania','Austria','Belarus',
'Belgium','Bulgaria','Croatia','Cyprus','Czech Republic','Denmark','Estonia',
'Finland','France','Germany','Greece','Hungary','Iceland','Ireland','Italy',
'Latvia','Lithuania','Luxembourg','Macedonia','Malta','Montenegro','Netherlands',
'Norway','Poland','Portugal','Romania','Russia','Serbia','Slovenia','Spain', 'Sweden',
             'Switzerland','United Kingdom','Turkey','Ukraine']
energy2010List = [1947,8347,3564,8369,4560,3814,4623,6348,6328,6506,16483,7736,7264,5318,3876,
                  51440,5911,5494,3230,3471,16830,3521,4171,5420,7010,24891,3797,4959,2551,
                  6410,4359,6521,5707,14934,8175,2498,3550,5701]
energy2012List = [2118,8507,3698,7987,4762,3819,4057,6305,6039,6689,15687,7344,7270,5511,3919,
                  53203,5665,5398,3588,3608,14696,3626,4761,5416,6871,23658,3899,4736,2604,
                  6617,4387,6778,5573,14290,7886,2794,3641,5452]

nameSeries = pd.Series(namesList)
energy2010Series = pd.Series(energy2010List)
energy2012Series = pd.Series(energy2012List)

In [62]:
mean2010 = energy2010Series.mean()
mean2010

np.float64(7779.8421052631575)

In [63]:
mean2012 = energy2012Series.mean()
mean2012

np.float64(7706.815789473684)

In [69]:
filterAboveMean2010 = energy2010Series > mean2010
filterAboveMean2010

0     False
1      True
2     False
3      True
4     False
5     False
6     False
7     False
8     False
9     False
10     True
11    False
12    False
13    False
14    False
15     True
16    False
17    False
18    False
19    False
20     True
21    False
22    False
23    False
24    False
25     True
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33     True
34     True
35    False
36    False
37    False
dtype: bool

In [70]:
filterAboveMean2012 = energy2012Series > mean2012
filterAboveMean2012

0     False
1      True
2     False
3      True
4     False
5     False
6     False
7     False
8     False
9     False
10     True
11    False
12    False
13    False
14    False
15     True
16    False
17    False
18    False
19    False
20     True
21    False
22    False
23    False
24    False
25     True
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33     True
34     True
35    False
36    False
37    False
dtype: bool

In [73]:
nameSeries.where(filterAboveMean2010 & filterAboveMean2012).dropna()

1         Austria
3         Belgium
10        Finland
15        Iceland
20     Luxembourg
25         Norway
33         Sweden
34    Switzerland
dtype: object

In [75]:
filterBeloweMean2010 = energy2010Series < mean2010
filterBeloweMean2010

0      True
1     False
2      True
3     False
4      True
5      True
6      True
7      True
8      True
9      True
10    False
11     True
12     True
13     True
14     True
15    False
16     True
17     True
18     True
19     True
20    False
21     True
22     True
23     True
24     True
25    False
26     True
27     True
28     True
29     True
30     True
31     True
32     True
33    False
34    False
35     True
36     True
37     True
dtype: bool

In [76]:
nameSeries.where(filterBeloweMean2010 & filterAboveMean2012).dropna()

Series([], dtype: object)