In [1]:
import pandas as pd
import numpy as np

In [2]:
sales = [0, 5, 155, 0, 518, 0, 1827, 616, 317, 325]
sales

[0, 5, 155, 0, 518, 0, 1827, 616, 317, 325]

In [3]:
# pandas series: A series is a column in SQL
sales_series = pd.Series(sales, name = "Sales")
sales_series

0       0
1       5
2     155
3       0
4     518
5       0
6    1827
7     616
8     317
9     325
Name: Sales, dtype: int64

In [4]:
sales_series.values

array([   0,    5,  155,    0,  518,    0, 1827,  616,  317,  325],
      dtype=int64)

In [5]:
sales_series.index

RangeIndex(start=0, stop=10, step=1)

In [6]:
sales_series.name

'Sales'

In [7]:
sales_series.name = "out Sales"
sales_series.name

'out Sales'

In [8]:
sales_series.dtype

dtype('int64')

In [9]:
name_series = pd.Series(np.array(['diamond', 'pearl', 'ruby', 'sapphire']))
name_series

0     diamond
1       pearl
2        ruby
3    sapphire
dtype: object

In [10]:
sales_series.mean()

376.3

In [11]:
sales_series.values.mean()

376.3

In [12]:
name_series.index = [1, 2, 3, 4]
name_series

1     diamond
2       pearl
3        ruby
4    sapphire
dtype: object

#putting together more than 1 series, we get a dataframe DF


In [13]:
name_series[1:3]

2    pearl
3     ruby
dtype: object

In [17]:
inventory = [0, 5, 155, 0, 518]

In [18]:
items = ["coffee", "bananas", "tea", "coconut", "sugar"]

In [19]:
inventory_series = pd.Series(inventory, index=items, name="sales")

In [20]:
inventory_series

coffee       0
bananas      5
tea        155
coconut      0
sugar      518
Name: sales, dtype: int64

In [25]:
inventory_series.iloc[2:5]

tea        155
coconut      0
sugar      518
Name: sales, dtype: int64

In [26]:
inventory_series.iloc[[1, 4]]

bananas      5
sugar      518
Name: sales, dtype: int64

In [27]:
inventory_series.loc["sugar"]

518

In [29]:
inventory_series.loc["bananas":"coconut"]

bananas      5
tea        155
coconut      0
Name: sales, dtype: int64

In [32]:
inventory_series.loc[[ "tea", "sugar"]]

tea      155
sugar    518
Name: sales, dtype: int64

In [33]:
inventory_series.reset_index()

Unnamed: 0,index,sales
0,coffee,0
1,bananas,5
2,tea,155
3,coconut,0
4,sugar,518


In [34]:
inventory_series.reset_index(drop=True)

0      0
1      5
2    155
3      0
4    518
Name: sales, dtype: int64

In [35]:
inventory_series[inventory_series > 100]

tea      155
sugar    518
Name: sales, dtype: int64

In [36]:
inventory_series.sort_values()

coffee       0
coconut      0
bananas      5
tea        155
sugar      518
Name: sales, dtype: int64

In [37]:
inventory_series.sort_values(ascending=False)

sugar      518
tea        155
bananas      5
coconut      0
coffee       0
Name: sales, dtype: int64

In [38]:
sales_series

0       0
1       5
2     155
3       0
4     518
5       0
6    1827
7     616
8     317
9     325
Name: out Sales, dtype: int64

In [42]:
"$" + sales_series.astype("float").astype("str")

0       $0.0
1       $5.0
2     $155.0
3       $0.0
4     $518.0
5       $0.0
6    $1827.0
7     $616.0
8     $317.0
9     $325.0
Name: out Sales, dtype: object

In [45]:
my_series = pd.Series([1, np.NaN, 2, 3, 4], index = ["day 1", "day 2", "day 3", "day 4", "day 5"])

In [46]:
my_series

day 1    1.0
day 2    NaN
day 3    2.0
day 4    3.0
day 5    4.0
dtype: float64

In [47]:
my_series + 1

day 1    2.0
day 2    NaN
day 3    3.0
day 4    4.0
day 5    5.0
dtype: float64

In [48]:
my_series.add(1, fill_value = 0)

day 1    2.0
day 2    1.0
day 3    3.0
day 4    4.0
day 5    5.0
dtype: float64

In [49]:
text_series = pd.Series(["day 1", "day 2", "day 3", "day 4", "day 5"])
text_series

0    day 1
1    day 2
2    day 3
3    day 4
4    day 5
dtype: object

In [52]:
text_series.str.contains('1')

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [53]:
text_series.str.upper()

0    DAY 1
1    DAY 2
2    DAY 3
3    DAY 4
4    DAY 5
dtype: object

In [56]:
text_series.str.upper().str.contains('DAY 1')

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [58]:
text_series.str.strip('day ')

0    1
1    2
2    3
3    4
4    5
dtype: object

In [59]:
text_series.str.strip('day ').astype('int')

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [61]:
text_series.str[-1].astype('int')

0    1
1    2
2    3
3    4
4    5
dtype: int32

In [66]:
text_series.str.split(' ', expand = True)

Unnamed: 0,0,1
0,day,1
1,day,2
2,day,3
3,day,4
4,day,5


In [72]:
# Aggregation 
transactions = pd.read_csv("transactions.csv")

In [75]:
transactions_series = pd.Series(transactions["transactions"])

In [77]:
transactions_series.loc[0:9]

0     770
1    2111
2    2358
3    3487
4    1922
5    1903
6    2143
7    1874
8    3250
9    2940
Name: transactions, dtype: int64

In [78]:
transactions_series.count()

83488

In [79]:
transactions_series.sum()

141478945

In [80]:
transactions_series.product()

0

In [81]:
transactions_series.mean()

1694.6021583940208

In [86]:
transactions_series.quantile([0.5])

0.5    1393.0
Name: transactions, dtype: float64

In [87]:
transactions_series.nunique()

4993

In [90]:
transactions_series.value_counts()

1207    90
1200    86
1304    81
1296    80
1282    80
1171    80
1229    79
1251    78
1306    77
1322    77
1244    76
1341    76
1297    76
1308    76
1277    76
1288    75
1152    75
1202    75
1326    75
1178    75
1373    74
1179    74
1263    74
1350    74
1122    74
1231    73
1250    73
1293    73
1324    73
1237    73
        ..
3929     1
7093     1
5369     1
5150     1
5299     1
4951     1
4980     1
4459     1
5492     1
5416     1
4793     1
5293     1
5684     1
4959     1
4452     1
5620     1
5391     1
5572     1
5466     1
5508     1
4718     1
7502     1
5444     1
5335     1
5214     1
5380     1
5722     1
5556     1
5316     1
4005     1
Name: transactions, Length: 4993, dtype: int64

In [91]:
transactions_series.unique()

array([ 770, 2111, 2358, ..., 4553, 4400, 4392], dtype=int64)

In [92]:
items_series = pd.Series(['coffee', 'coffee', 'tea', 'coconut', 'sugar'])
items_series

0     coffee
1     coffee
2        tea
3    coconut
4      sugar
dtype: object

In [93]:
items_series.nunique()

4

In [94]:
items_series.unique()

array(['coffee', 'tea', 'coconut', 'sugar'], dtype=object)

In [97]:
items_series.count()

5

In [100]:
items_series.value_counts()

coffee     2
coconut    1
tea        1
sugar      1
dtype: int64

In [101]:
items_series.value_counts(normalize = True)

coffee     0.4
coconut    0.2
tea        0.2
sugar      0.2
dtype: float64