# 31.08.23

# GETTING STARTED WITH PANDAS

## To get started with Pandas, Numpy and Pandas needs to be imported.

In [None]:
# importing libraries

import numpy as np
import pandas as pd

## In a nutshell, Pandas objects are advanced versions of NumPy structured arrays in which the rows and columns are identified with labels instead of simple integer indices.

## The basic data structures of Pandas as Series and DataFrame.

# PANDAS SERIES OBJECT

## Series is 1D labelled array.
## It supports diff datatypes like integer, float, string etc.

## Syntax:
## pd.Series(data,index,dtype)
### data - list, dict, etc
### index - can be explicitely defined or diff values if required.
### dtype - dtype used in the series (optional parameter)

In [None]:
import pandas as pd

# Creating a Series of
series = pd.Series(data=[78,83,59,92,75])
series

0    78
1    83
2    59
3    92
4    75
dtype: int64

## The series object provides the values along with their index attributes.

## Series.values provides the values.

In [None]:
series.values

array([78, 83, 59, 92, 75])

## Series.index provides the index.

In [None]:
series.index

RangeIndex(start=0, stop=5, step=1)

## Accessing Data in Series: Using []

In [None]:
series[2]

59

## Slicing a Series:

In [None]:
series[1:3]

1    83
2    59
dtype: int64

# CUSTOM INDEX IN SERIES

## By default, series creates an integer indes.
## The custom index can also be defined.

### Generating a Pandas series with string type indices


In [None]:
data = pd.Series(data=[700000,800000,1600000,1800000,3000000], index=['Swift','Jazz','Civic','Altis','Gallardo'])
data

Swift        700000
Jazz         800000
Civic       1600000
Altis       1800000
Gallardo    3000000
dtype: int64

### Accessing Values:

In [None]:
data['Swift']

700000

In [None]:
data['Jazz':'Gallardo']

Jazz         800000
Civic       1600000
Altis       1800000
Gallardo    3000000
dtype: int64

### In this case, the o/p starts from Jazz and goes till Gallardo(inclusive).
### This is the fundamental DIFFERENCE b/w implicite and explicite indexing.

# SERIES AS A SPECIALIZED DICTIONARY

## Series can also be viewed as a specialized dictionary where the keys act as index and corresponding values act as values.

### Creating a Series out of the dictionary data structure:

In [None]:
car_price_dict = {'Swift':700000,
                  'Jazz':800000,
                  'Civic':1600000,
                  'Altis':1800000,
                  'Gallardo':3000000}

car_price = pd.Series(car_price_dict)
car_price

Swift        700000
Jazz         800000
Civic       1600000
Altis       1800000
Gallardo    3000000
dtype: int64

# 01.09.23

# PANDAS DATAFRAME OBJECT

## A series gives a useful way to vew and manipulate 1D data. But when data is present in rows and columns, it becomes necessary to make use of the Pandas DataFrame object.
## A DataFrame is a collection of series where each series represents a column from a table.

### Creating two Series from two dictionaries - one containing car name and price and the other with car name and manufacturer.

In [11]:
import pandas as pd

# Creating the car-price series with a dict
car_price_dict = {'Swift':700000,
                  'Jazz':800000,
                  'Gallardo':3000000}
car_price = pd.Series(car_price_dict)

# Creating the car-manufacturer series with a dict
car_man_dict = {'Swift':'Maruti',
                'Jazz':'Honda',
                'Gallardo':'Lamborghini'}
car_man = pd.Series(car_man_dict)

print("Car-Price Series:\n", car_price)
print("Car-Manufacturer Series:\n", car_man)

Car-Price Series:
 Swift        700000
Jazz         800000
Gallardo    3000000
dtype: int64
Car-Manufacturer Series:
 Swift            Maruti
Jazz              Honda
Gallardo    Lamborghini
dtype: object


## Creating a DataFrame Object using the Series Objects.

### Syntax:
> ### pd.DataFrame(data,index,columns)



> data: Can contain Series or list-like objects. If data is a dict, column order follows insertion-order.

> index: Index for DF that is created. By default, it'll be RangeIndex(0,1,2,...,n) if no explicite index is provided.

> columns: If data contains columns labels, it'll use the same, else, default to RangeIndex(0,1,2,...,n).

















In [12]:
cars = pd.DataFrame({'Price':car_price, 'Manufacturer':car_man})
cars

Unnamed: 0,Price,Manufacturer
Swift,700000,Maruti
Jazz,800000,Honda
Gallardo,3000000,Lamborghini


### The car names act as the indices and 'Price' & 'Manufacturer' act as the columns/features of this small dataset.

## Accessing Individual Features

In [13]:
cars['Price']

Swift        700000
Jazz         800000
Gallardo    3000000
Name: Price, dtype: int64

In [14]:
cars['Manufacturer']

Swift            Maruti
Jazz              Honda
Gallardo    Lamborghini
Name: Manufacturer, dtype: object

# WAYS TO CREATE A DATAFRAME

## 1. From a Single Series Object

### A DataFrame is a collection of Series onjects, and a single-column DataFrame can be constructed from a single Series.

In [15]:
# Using dict to create a series
car_price_dict = {'Civic':1600000,
                  'Altis':1800000,
                  'Gallardo':3000000}
car_price_sr = pd.Series(car_price_dict)
print(car_price_sr)

# Creating a DataFrame form car_price_series
car_price_df = pd.DataFrame(car_price_sr, columns=['Car Price'])
print(car_price_df)

Civic       1600000
Altis       1800000
Gallardo    3000000
dtype: int64
          Car Price
Civic       1600000
Altis       1800000
Gallardo    3000000


## 2. From a List of Dictionaries

In [17]:
# List of dicts
name_age_dict = [{'Name':'Subbu', 'Age':34},
        {'Name':'Abdul', 'Age':23},
        {'Name':'John', 'Age':31}]
name_age_df = pd.DataFrame(name_age_dict)
name_age_df

Unnamed: 0,Name,Age
0,Subbu,34
1,Abdul,23
2,John,31


In [19]:
# List of dicts
pd.DataFrame([{'Subbu':23,'Abdul':54},{'Ray':45, 'Jax':31}],
             index=['Maths', 'Physics'])

Unnamed: 0,Subbu,Abdul,Ray,Jax
Maths,23.0,54.0,,
Physics,,,45.0,31.0


#### Here, each element in the list is taken as a row.
#### NaN represents missing values.

## 3. From a Dictionary of Series Objects

### A DataFrame can be constructed form a dict of Series objects.

In [21]:
# Creating the car-price series with a dict
car_price_dict = {'Swift':700000,
                  'Jazz':800000,
                  'Gallardo':3000000}
car_price = pd.Series(car_price_dict)

# Creating the car-manufacturer series with a dict
car_man_dict = {'Swift':'Maruti',
                'Jazz':'Honda',
                'Gallardo':'Lamborghini'}
car_man = pd.Series(car_man_dict)

print("Car-Price Series:\n", car_price)
print("Car-Manufacturer Series:\n", car_man)

Car-Price Series:
 Swift        700000
Jazz         800000
Gallardo    3000000
dtype: int64
Car-Manufacturer Series:
 Swift            Maruti
Jazz              Honda
Gallardo    Lamborghini
dtype: object


In [23]:
# Creating the DataFrame
car_price_man = pd.DataFrame({'Price':car_price, 'Manufacturer':car_man})
car_price_man

Unnamed: 0,Price,Manufacturer
Swift,700000,Maruti
Jazz,800000,Honda
Gallardo,3000000,Lamborghini


## 4. From an Existing File

### In most real world scenarios, the data is in different file formats like csv, xlsx, json, etc.


## The axis Keyword

### It's one of the most important parameters used while performing operations on DataFrames.
### It takes tow values: 0 and 1


*   axis=0 represents row specific operations.
*   axis=1 represents column specific operations.



