This notebook contains **python codes** on pandas.

- How to import Pandas
- How to create Pandas Series and DataFrames using various methods
- How to access and change elements in Series and DataFrames
- How to perform arithmetic operations on Series
- How to load data into a DataFrame
- How to deal with Not a Number (NaN) values

In [1]:
#Import pandas and create Pandas series
import pandas as pd
groceries = pd.Series(data = [30, 6, 'Yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])
print(groceries)
print(groceries.shape)
print(groceries.ndim)
print(groceries.size)
print(groceries.index)
print(groceries.values)
print('banana' in groceries)
print('eggs' in groceries)

In [2]:
#Acess/Modify elements 
print(groceries['eggs'])
print(groceries[['eggs', 'milk']])
print(groceries[0])
print(groceries[-1])
print(groceries[[0, 2]])
print()
print(groceries.loc[['apples', 'bread']])
print(groceries.iloc[[2, 3]])
print()
groceries['eggs'] = 4
print(groceries)
print()
print(groceries.drop('eggs'))
print(groceries.drop('eggs', inplace = True)) 

In [3]:
#Arithmetic operations
fruits= pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])
print(fruits)
print(fruits - 2)
print(fruits + 2)
print(fruits * 2)
print(fruits / 2)
print(fruits.dtype)

In [4]:
import numpy as np
print(np.sqrt(fruits))
print(np.exp(fruits))
print(np.power(fruits, 2))

In [5]:
print(fruits['bananas'] + 2)
print(fruits[['apples', 'bananas']] * 2)
print(fruits.loc[['apples', 'bananas']] / 2)


In [6]:
distance_from_sun = [149.6, 1433.5, 227.9, 108.2, 778.6]
planets = ['Earth','Saturn', 'Mars','Venus', 'Jupiter']

Distance = pd.Series(data = distance_from_sun, index = planets)
print(Distance)
time_taken = Distance / 18
print(time_taken)
close_planets = time_taken[time_taken < 40]
print(close_planets)

In [7]:
#Creating Pandas Dataframe
import pandas as pd
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}
shopping_carts = pd.DataFrame(items)
print(shopping_carts)
print(shopping_carts.columns)
print(shopping_carts.values)
print(shopping_carts.shape)
print(shopping_carts.ndim)
print(shopping_carts.size)

In [8]:
bob = pd.DataFrame(items, columns = ['Bob'])
print(bob)
sell = pd.DataFrame(items, index = ['pants', 'watch'])
print(sell)
alice = pd.DataFrame(items, index = ['pants', 'watch'], columns = ['Alice'])
print(alice)

In [9]:
#Accessing Elements in Pandas DataFrames
data = {'Integers': [1, 2, 3], 'Float': [4.5, 8.2, 9.6]}
data2 = pd.DataFrame(data, index = ['Label 1', 'Label 2', 'Label 3'])
print(data2)
items = ({'Bikes': 20, 'Pants': 10, 'Watches': 38}, {'Watches': 48, 'Bikes': 15, 'Pants': 5, 'Book': 45})
stores = pd.DataFrame(items, index=['Store 1', 'Store 2'])
print(stores)
print(stores[['Bikes']])
print(stores[['Bikes', 'Watches']])
print(stores.loc[['Store 1']])
print(stores['Bikes']['Store 1'])
stores ['Shirts']= [20, 5]
print(stores)
stores['Suits'] = stores['Pants'] + stores['Shirts']
print(stores)
new_items = ({'Watches': 48, 'Pants': 15, 'Shirts': 5, 'Book': 45})
new_store = pd.DataFrame(new_items, index = ['Store 3'])
stores = stores.append(new_store)
print(stores)
stores['New_Watches'] = stores['Watches'] [1:]
print(stores)
stores.insert(5, 'Shoes', [8, 7, 9])
print(stores)
stores.pop('New_Watches')
print(stores)
stores = stores.drop(['Watches', 'Shoes'], axis = 1)
print()
print(stores)
stores = stores.drop(['Store 1', 'Store 2'], axis = 0)
print(stores)
stores = stores.rename(columns = {'Bikes' : 'Hats'})
print(stores)
stores = stores.rename(index = {'Store 3' : 'Last Store'})
print(stores)

In [10]:
#Dealing with NaN
items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

store_items = pd.DataFrame(items, index = ['store 1', 'store 2', 'store 3'])
x = store_items.isnull()
y = store_items.isnull().sum()
z = store_items.isnull().sum().sum()
a = store_items.count()
print(x,  y, z, a)
print()
print(store_items)
b = store_items.dropna(axis = 0)
print()
print(b)
c = store_items.dropna(axis = 1)
print(c)
d = store_items.fillna(0)
print(d)
e = store_items.fillna(method = 'ffill', axis = 0)
print(e)
f = store_items.fillna(method = 'ffill', axis = 1)
print(f)
g = store_items.fillna(method = 'backfill', axis = 0)
print(g)
h = store_items.fillna(method = 'backfill', axis = 1)
print(h)
i = store_items.interpolate(method = 'linear', axis = 0)
print('i = ', i)
j = store_items.interpolate(method = 'linear', axis = 1)
print(j)

In [11]:
import pandas as pd
import numpy as np
pd.set_option('precision', 1)
books = pd.Series(data = ['Great Expectations', 'Of Mice and Men', 'Romeo and Juliet', 'The Time Machine', 'Alice in Wonderland' ])
authors = pd.Series(data = ['Charles Dickens', 'John Steinbeck', 'William Shakespeare', ' H. G. Wells', 'Lewis Carroll' ])
user_1 = pd.Series(data = [3.2, np.nan ,2.5])
user_2 = pd.Series(data = [5., 1.3, 4.0, 3.8])
user_3 = pd.Series(data = [2.0, 2.3, np.nan, 4])
user_4 = pd.Series(data = [4, 3.5, 4, 5, 4.2])
dat = { 'Book Title': books, 
        'Author': authors,
        'User 1': user_1, 
        'User 2': user_2,
        'User 3': user_3, 
        'User 4': user_4}
book_ratings = pd.DataFrame(dat)
print(book_ratings)
print()
print(book_ratings.fillna(book_ratings.mean(), inplace = False))
best_rated = book_ratings[(book_ratings == 5).any(axis = 1)]['Book Title'].values
print(best_rated)

In [12]:
#Loading Data into a pandas DataFrame
import pandas as pd
# -- To read a CSV file      data = pd.read_excel('data.xlsx') 
data = pd.read_csv('data.csv') 
print(type(data))
print(data)

In [13]:
#viewing the dataframe
print(data.shape)
print(data.size)
print(data.head(10))
print()
print(data.tail(10))

In [149]:
#checking for null values
print(data.isnull().any())
print(data.isnull().sum())
print(data.isnull().sum().sum())
print(data.count())

In [155]:
#Basic Statistics
print(data.describe())
print(data['Age'].describe())
print(data.min())
print(data.max())
print(data.mean())
print(data.corr())


In [169]:
print(data.groupby(['Pclass'])['Survived'].sum())
print(data.groupby(['Age'])['Survived'].sum())
print()
print(data.groupby(['Sex'])['Survived'].sum())
print(data.groupby(['Sex', 'Age'])['Survived'].sum())
