# pandas

### series

In [43]:
# creating a series
import pandas as pd
ser = pd.Series()  # empty series
ser 

Series([], dtype: float64)

In [44]:
# 1. creating a series from an ndarray
import numpy as np
arr = np.array([11, 12, 13, 14])
ser = pd.Series(arr)
print(ser)

0    11
1    12
2    13
3    14
dtype: int64


In [45]:
# 2. creating a series from a list
my_list = [11, 12, 13, 14]
ser = pd.Series(my_list)
print(ser)


0    11
1    12
2    13
3    14
dtype: int64


In [46]:
ser[0]  # access values via index

11

In [47]:
# 2. creating a series from a dictionary
# dict keys would become the index
my_dict = {'a':100, 'b':200, 'c':300}
ser = pd.Series(my_dict)
print(ser)


a    100
b    200
c    300
dtype: int64


In [48]:
ser['a']

100

In [49]:
ser['c']

300

In [50]:
# you can provide the datatype of the series by providing the value of dtype
my_list = [11, 12, 13, 14]
ser = pd.Series(my_list, dtype=np.int16)
print(ser)

0    11
1    12
2    13
3    14
dtype: int16


In [51]:
# slicing in series
my_list = [11, 12, 13, 14, 15, 16]
ser = pd.Series(my_list, dtype=np.int16)
print(ser[1:4])

1    12
2    13
3    14
dtype: int16


# DataFrame

In [52]:
import pandas

my_list = [11, 12, 13, 14, 15, 16]

df = pd.DataFrame(my_list)
print(df)

    0
0  11
1  12
2  13
3  14
4  15
5  16


In [53]:
# more then one column dataframe, using list of dict
# dict keys have become column names and any missing column name would..
#...contains NaN(Not a number)

my_dict = [{'a':1, 'b':2, 'c':3},
          {'a':11, 'b':22}]

df = pd.DataFrame(my_dict)
print(df)



    a   b    c
0   1   2  3.0
1  11  22  NaN


In [54]:
# how to override the default row indexes(0 1 ...): using 'index' param
my_dict = [{'a':1, 'b':2, 'c':3},
          {'a':11, 'c':22}]

df = pd.DataFrame(my_dict, index=['first', 'second'])
print(df)

         a    b   c
first    1  2.0   3
second  11  NaN  22


In [55]:
# converting a dict of series into a DataFrame
# dict keys would become the column names
# index paramter values would become dataframe row labels
import pandas

data = {'one': pandas.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two': pandas.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

table = pandas.DataFrame(data)
print(table)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4


# Dataframe column addition

In [56]:
import pandas

data = {'one': pandas.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two': pandas.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

table = pandas.DataFrame(data)
print(table)

   one  two
a  1.0    1
b  2.0    2
c  3.0    3
d  NaN    4


In [57]:
# access a column
table['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [58]:
# adding a new column with the name 'three'
table['three'] = pandas.Series([11, 12], index=['a', 'b'])
table

Unnamed: 0,one,two,three
a,1.0,1,11.0
b,2.0,2,12.0
c,3.0,3,
d,,4,


In [59]:
table['four'] = [100, 200, 300, 400]
table

Unnamed: 0,one,two,three,four
a,1.0,1,11.0,100
b,2.0,2,12.0,200
c,3.0,3,,300
d,,4,,400


# column deletion

In [60]:
del table['one']
table

Unnamed: 0,two,three,four
a,1,11.0,100
b,2,12.0,200
c,3,,300
d,4,,400


In [61]:
# also delete with the pop(): it deletes and returns the deleted value
table.pop('two')

a    1
b    2
c    3
d    4
Name: two, dtype: int64

# Dataframe - Row selection

In [62]:
table

Unnamed: 0,three,four
a,11.0,100
b,12.0,200
c,,300
d,,400


In [63]:
# access a row by passing the index to the loc() function
table.loc['c']

three      NaN
four     300.0
Name: c, dtype: float64

In [64]:
table.loc['a']

three     11.0
four     100.0
Name: a, dtype: float64

In [65]:
# how to access row with row index in numbers
# iloc(index)
table.iloc[0] # first row

three     11.0
four     100.0
Name: a, dtype: float64

In [66]:
table.iloc[1]  # second row

three     12.0
four     200.0
Name: b, dtype: float64

# Dataframe - Row Addition

In [67]:
import pandas

data = {'one': pandas.Series([1, 2, 3], index=['a', 'b', 'c']),
        'two': pandas.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])}

table = pandas.DataFrame(data)
table['three'] = pandas.Series([11, 12], index=['a', 'b'])
print(table)

   one  two  three
a  1.0    1   11.0
b  2.0    2   12.0
c  3.0    3    NaN
d  NaN    4    NaN


In [68]:
# add a row
row = pandas.DataFrame([[10, 20], [30, 40]], columns=['two', 'three'])
print("Row that is going to be added")
print(row)
table = table.append(row, sort=True)
print("Dataframe after row addition")
print(table)

Row that is going to be added
   two  three
0   10     20
1   30     40
Dataframe after row addition
   one  three  two
a  1.0   11.0    1
b  2.0   12.0    2
c  3.0    NaN    3
d  NaN    NaN    4
0  NaN   20.0   10
1  NaN   40.0   30


# Row deletion

In [69]:
# deleting the row with the index 'a'
table = table.drop('a')  # drop(index)
print(table)

   one  three  two
b  2.0   12.0    2
c  3.0    NaN    3
d  NaN    NaN    4
0  NaN   20.0   10
1  NaN   40.0   30


# Loading CSV data into DataFrames

In [71]:
import pandas
file_path = r'/Users/gyanendra/Desktop/PyDataScience_venv/Module4/574_m4_datasets_v3.0/BigMartSalesData.csv'
dataframe = pandas.read_csv(file_path)
dataframe

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,UnitPrice,Amount,InvoiceDate,Day,Month,Year,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,01-12-10,1,12,2010,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,3.39,20.34,01-12-10,1,12,2010,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2.75,22.00,01-12-10,1,12,2010,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,3.39,20.34,01-12-10,1,12,2010,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,3.39,20.34,01-12-10,1,12,2010,17850.0,United Kingdom
5,536365,22752,SET 7 BABUSHKA NESTING BOXES,2,7.65,15.30,01-12-10,1,12,2010,17850.0,United Kingdom
6,536365,21730,GLASS STAR FROSTED T-LIGHT HOLDER,6,4.25,25.50,01-12-10,1,12,2010,17850.0,United Kingdom
7,536366,22633,HAND WARMER UNION JACK,6,1.85,11.10,01-12-10,1,12,2010,17850.0,United Kingdom
8,536366,22632,HAND WARMER RED POLKA DOT,6,1.85,11.10,01-12-10,1,12,2010,17850.0,United Kingdom
9,536367,84879,ASSORTED COLOUR BIRD ORNAMENT,32,1.69,54.08,01-12-10,1,12,2010,13047.0,United Kingdom
