# Loading data into Pandas


In [2]:
import pandas as pd
df = pd.read_csv('pandas-master/pandas-master/pokemon_data.csv')
print(df.head(3))
print(df.tail(3))

   #       Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  \
0  1  Bulbasaur  Grass  Poison  45      49       49       65       65     45   
1  2    Ivysaur  Grass  Poison  60      62       63       80       80     60   
2  3   Venusaur  Grass  Poison  80      82       83      100      100     80   

   Generation  Legendary  
0           1      False  
1           1      False  
2           1      False  
       #                 Name   Type 1 Type 2  HP  Attack  Defense  Sp. Atk  \
797  720  HoopaHoopa Confined  Psychic  Ghost  80     110       60      150   
798  720   HoopaHoopa Unbound  Psychic   Dark  80     160       60      170   
799  721            Volcanion     Fire  Water  80     110      120      130   

     Sp. Def  Speed  Generation  Legendary  
797      130     70           6       True  
798      130     80           6       True  
799       90     70           6       True  


# Declaring a Series 
use the .Series([array]) method with and [array as parameters]

In [3]:
s = pd.Series([12,-4,7,9])
print(s.values, "values of s data object")
print(s.index, "index values")


[12 -4  7  9] values of s data object
RangeIndex(start=0, stop=4, step=1) index values


In [4]:
# you can specify the index labels using the second parameter 
# be carefu no. of elements in the array = the index or else value error 
t = pd.Series([12,-4,7,9], index=['a','b', 'c', 'd'])
print(t.values, "values of t data object")
print(t.index, "index values")

[12 -4  7  9] values of t data object
Index(['a', 'b', 'c', 'd'], dtype='object') index values


## Defining Series from NumPy Arrays and Other Series 
You can define new Series using NumPy arrays or existing series 
Keep in mind that the values in the NumPy array or the original series are not copied but are passed by reference. Any change to the original results in dynamic downstream changes. 

In [5]:
import numpy as np
arr = np.array([1,2,3,4])
s3 = pd.Series(arr)
s3


0    1
1    2
2    3
3    4
dtype: int32

In [6]:
s4 = pd.Series(s)
s4

0    12
1    -4
2     7
3     9
dtype: int64

In [7]:
s3[2] = -2

In [8]:
s3

0    1
1    2
2   -2
3    4
dtype: int32

## Filtering values 
The methods and opertions applicable to numpy are extended to pandas

In [9]:
s[s > 8]

0    12
3     9
dtype: int64

## Operationas and Mathematical functions 

In [10]:
s3 * 1002 / 23 ** 2

0    1.89414
1    3.78828
2   -3.78828
3    7.57656
dtype: float64

In [11]:
np.log(s3)

  result = getattr(ufunc, method)(*inputs, **kwargs)


0    0.000000
1    0.693147
2         NaN
3    1.386294
dtype: float64

## Evaluating Values 

In [12]:
serd = pd.Series([1,0,2,1,2,3], index=['white', 'white', 'blue', 'green','green', 'yellow'])
serd

white     1
white     0
blue      2
green     1
green     2
yellow    3
dtype: int64

In [13]:
serd.unique()

array([1, 0, 2, 3], dtype=int64)

In [14]:
serd.value_counts()

1    2
2    2
0    1
3    1
dtype: int64

In [15]:
serd.isin([0,3])

white     False
white      True
blue      False
green     False
green     False
yellow     True
dtype: bool

In [16]:
serd[serd.isin([0,3])]

white     0
yellow    3
dtype: int64

# Series as Dictionary 
A pandas series is more like a dictionary **{key:value}** pair   

In [21]:
my_dict = {'red': 2000, 'blue':1000, 'yellow': 500, 'orange':1000}
myseries = pd.Series(my_dict)
myseries

red       2000
blue      1000
yellow     500
orange    1000
dtype: int64

In [23]:
mycolors = ['yellow', 'red', 'purple', 'pink']
myseries2 = pd.Series(my_dict, index=mycolors)
myseries2

yellow     500.0
red       2000.0
purple       NaN
pink         NaN
dtype: float64

In [24]:
dataset1 = {'red':2000, 'blue':300,'white':1700}
d1 = pd.Series(dataset1)
colors = ['green', 'red', 'white']
dataset2 = [1000, 1500, 9000]
d2 = pd.Series(dataset2,index=colors)

height_sum = d1 + d2 


In [25]:
print(height_sum)

blue         NaN
green        NaN
red       3500.0
white    10700.0
dtype: float64
