In [29]:
import pandas as pd
import numpy as np

# Pandas Introduce Two New DataStructures to Python: Series and DataFrame

# **Series**

## We can declare a series inside a list

In [17]:
pd.Series([7,'MS Dhoni', 'Cricket', 'Batsmen', 'Wicketkeeper'])

0               7
1        MS Dhoni
2         Cricket
3         Batsmen
4    Wicketkeeper
dtype: object

## The Series Constructor can convert a dictonary as well, using the keys of the dictionary as its index

In [18]:
pd.Series({'id': 7, 'Name' :'MSDhoni', 'Sport' : 'Cricket', 'Batsman': 1, 'Wicketkeeper': 1, 'Bowler' : 0})

id                    7
Name            MSDhoni
Sport           Cricket
Batsman               1
Wicketkeeper          1
Bowler                0
dtype: object

In [19]:
d = {'Chicago': 1000, 
     'New york': 1300,
     'Portland': 900,
     'San Francisco': 1100,
     'Austin': 450,
     'Boston': None}

cities = pd.Series(d)
cities

Chicago          1000.0
New york         1300.0
Portland          900.0
San Francisco    1100.0
Austin            450.0
Boston              NaN
dtype: float64

## We can use the index to select specific items from the series

In [20]:
cities['Chicago']

1000.0

In [22]:
cities[['Chicago','Portland', 'Austin']]

Chicago     1000.0
Portland     900.0
Austin       450.0
dtype: float64

## We can also use boolean indexing for selection

In [23]:
cities[cities>1000]

New york         1300.0
San Francisco    1100.0
dtype: float64

## To check if the item is in series we can check using idiomatic python

In [26]:
print('Seattle' in cities)
print('San Francisco' in cities)

False
True


## Mathematical Operations can be done using scalars and functions

In [27]:
cities / 3

Chicago          333.333333
New york         433.333333
Portland         300.000000
San Francisco    366.666667
Austin           150.000000
Boston                  NaN
dtype: float64

In [31]:
np.square(cities)

Chicago          1000000.0
New york         1690000.0
Portland          810000.0
San Francisco    1210000.0
Austin            202500.0
Boston                 NaN
dtype: float64

### We can add two series

In [32]:
cities[['Chicago', 'New york', 'Portland']] + cities[['Austin', 'New york']]

Austin         NaN
Chicago        NaN
New york    2600.0
Portland       NaN
dtype: float64

### Null Check

Checking for null values

In [34]:
cities.isnull()

Chicago          False
New york         False
Portland         False
San Francisco    False
Austin           False
Boston            True
dtype: bool

Checking for not null values

In [35]:
cities.notnull()

Chicago           True
New york          True
Portland          True
San Francisco     True
Austin            True
Boston           False
dtype: bool

# DataFrame

In [36]:
data = {'year': [2010, 2011, 2012, 2011, 2012, 2010, 2011, 2012],
        'team': ['Bears', 'Bears', 'Bears', 'Packers', 'Packers', 'Lions', 'Lions', 'Lions'],
        'wins': [11, 8, 10, 15, 11, 6, 10, 4],
        'losses': [5, 8, 6, 1, 5, 10, 6, 12]}
football = pd.DataFrame(data, columns=['year', 'team', 'wins', 'losses'])
football

Unnamed: 0,year,team,wins,losses
0,2010,Bears,11,5
1,2011,Bears,8,8
2,2012,Bears,10,6
3,2011,Packers,15,1
4,2012,Packers,11,5
5,2010,Lions,6,10
6,2011,Lions,10,6
7,2012,Lions,4,12


## Reading Data from CSV file

In [2]:
vehicle_dataframe = pd.read_csv('car data.csv')

In [37]:
vehicle_dataframe.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0
