<a href="https://colab.research.google.com/github/Battula-Shilpa/-Python/blob/main/7_Pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- Pandas is an open-source data analysis and data manipulation library
- Pandas will provide two data structures

  - **Series:** A 1D labeled (or index) array capable of holding any data type.

  - **DataFrame:** A 2D labeled data structure, similar to a spreadsheet or SQL table, where data is
organized in rows and columns.

In [None]:
import pandas as pd

# Series

In [None]:
#Creating an Empty series Object
a = pd.Series()
print(a)

Series([], dtype: object)


In [None]:
#Creating Series from Lists/Arrays
a = [1,2,3,4]
b = pd.Series(a)
print(b)

0    1
1    2
2    3
3    4
dtype: int64


In [None]:
#Indexing and Accessing values
print(b[0])
print(b[2])

1
3


In [None]:
#Accessing series elements with slicing
b[1:3]

Unnamed: 0,0
1,2
2,3


In [None]:
#Use iloc for integer-location based indexing
b.iloc[0:3]

Unnamed: 0,0
0,1
1,2
2,3


In [None]:
#Filter the series
print(b[b<4])

0    1
1    2
2    3
dtype: int64


In [None]:
# Creating Series with labels
import pandas as pd
data = [10,20,30,40,50]
labels = ['A','B','C','D','E']
a = pd.Series(data,index = labels)
print(a)

A    10
B    20
C    30
D    40
E    50
dtype: int64


In [None]:
#Inserting a new value
a['F'] = 60
a

Unnamed: 0,0
A,10
B,20
C,30
D,40
E,50
F,60


In [None]:
#updating an existing value
a['A'] = 12
a

Unnamed: 0,0
B,20
C,30
D,40
E,50
F,60
A,12


In [None]:
#Delete a value using del
del a['A']
print("After deleting")
a

After deleting


Unnamed: 0,0
B,20
C,30
D,40
E,50
F,60


In [None]:
# Checkinh a null values
a.isnull()

Unnamed: 0,0
B,False
C,False
D,False
E,False
F,False


In [None]:
#Checking a NaN values
a.isna()

Unnamed: 0,0
B,False
C,False
D,False
E,False
F,False


# Data Frame

In [None]:
import numpy as np

In [None]:
  #Creating a dataframe
  data = {
      'Name' : ['Shilpa','Srini','Chinni',None],
      'Age' : [21,20,19,np.nan],
      'City' : ['Hyderabad','Chennai','Pune',None]
  }
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Shilpa,21.0,Hyderabad
1,Srini,20.0,Chennai
2,Chinni,19.0,Pune
3,,,


In [None]:
print(df.iloc[0])

Name       Shilpa
Age            21
City    Hyderabad
Name: 0, dtype: object


In [None]:
df.iloc[0:2]

Unnamed: 0,Name,Age,City
0,Shilpa,21,Hyderabad
1,Srini,20,Chennai


In [None]:
# DataFrame Attribute and Methods
print(df.shape)
print(df.columns)
print(df.index)
print(df.dtypes)

(4, 3)
Index(['Name', 'Age', 'City'], dtype='object')
RangeIndex(start=0, stop=4, step=1)
Name    object
Age      int64
City    object
dtype: object


In [None]:
print(df.head())
print(df.tail())
print(df.describe())
print(df.info())

     Name  Age       City
0  Shilpa   21  Hyderabad
1   Srini   20    Chennai
2  Chinni   19       Pune
3            0           
     Name  Age       City
0  Shilpa   21  Hyderabad
1   Srini   20    Chennai
2  Chinni   19       Pune
3            0           
             Age
count   4.000000
mean   15.000000
std    10.033278
min     0.000000
25%    14.250000
50%    19.500000
75%    20.250000
max    21.000000
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   City    4 non-null      object
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes
None


In [None]:
#Rename columns
df_rename = df.rename(columns={'Age': 'Years'})
df_rename

Unnamed: 0,Name,Years,City
0,Shilpa,21.0,Hyderabad
1,Srini,20.0,Chennai
2,Chinni,19.0,Pune
3,,,


In [None]:
#rename index
df_index = df.rename(index={0: '00'})
df_index

Unnamed: 0,Name,Age,City
0,Shilpa,21.0,Hyderabad
1,Srini,20.0,Chennai
2,Chinni,19.0,Pune
3,,,


In [None]:
#Handling missing or NaN values
df

Unnamed: 0,Name,Age,City
0,Shilpa,21.0,Hyderabad
1,Srini,20.0,Chennai
2,Chinni,19.0,Pune
3,,,


In [None]:
df.isna()

Unnamed: 0,Name,Age,City
0,False,False,False
1,False,False,False
2,False,False,False
3,True,True,True


In [None]:
# Filling NaN values
df.fillna(0)

Unnamed: 0,Name,Age,City
0,Shilpa,21.0,Hyderabad
1,Srini,20.0,Chennai
2,Chinni,19.0,Pune
3,0,0.0,0


In [None]:
# Dropping rows with NaN values
df.dropna()

Unnamed: 0,Name,Age,City
0,Shilpa,21.0,Hyderabad
1,Srini,20.0,Chennai
2,Chinni,19.0,Pune


# Example

In [None]:
import numpy as np
import pandas as pd

In [None]:
'''from google.colab import files
uploaded = files.upload()'''

'from google.colab import files\nuploaded = files.upload()'

In [None]:
# !find /content/drive/ -name !find /content/drive/ -name "C:\Users\battu\projects\insurance.csv"


In [None]:
df = pd.read_csv("insurance.csv")  # For CSV files
print(df.head())

   age     sex     bmi  children smoker     region      charges
0   19  female  27.900         0    yes  southwest  16884.92400
1   18    male  33.770         1     no  southeast   1725.55230
2   28    male  33.000         3     no  southeast   4449.46200
3   33    male  22.705         0     no  northwest  21984.47061
4   32    male  28.880         0     no  northwest   3866.85520


In [None]:
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [None]:
df.tail()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
1333,50,male,30.97,3,no,northwest,10600.5483
1334,18,female,31.92,0,no,northeast,2205.9808
1335,18,female,36.85,0,no,southeast,1629.8335
1336,21,female,25.8,0,no,southwest,2007.945
1337,61,female,29.07,0,yes,northwest,29141.3603


In [None]:
df.describe()

Unnamed: 0,age,bmi,children,charges
count,1338.0,1338.0,1338.0,1338.0
mean,39.207025,30.663397,1.094918,13270.422265
std,14.04996,6.098187,1.205493,12110.011237
min,18.0,15.96,0.0,1121.8739
25%,27.0,26.29625,0.0,4740.28715
50%,39.0,30.4,1.0,9382.033
75%,51.0,34.69375,2.0,16639.912515
max,64.0,53.13,5.0,63770.42801


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [None]:
df.shape

(1338, 7)

In [None]:
df.dtypes

Unnamed: 0,0
age,int64
sex,object
bmi,float64
children,int64
smoker,object
region,object
charges,float64


In [None]:
#select first row by label
print(df.loc[0])

age                19
sex            female
bmi              27.9
children            0
smoker            yes
region      southwest
charges     16884.924
Name: 0, dtype: object


In [None]:
print(df.loc[0:1,['age','bmi']])

   age    bmi
0   19  27.90
1   18  33.77


In [None]:
#Slicing
print(df.iloc[0])

age                19
sex            female
bmi              27.9
children            0
smoker            yes
region      southwest
charges     16884.924
Name: 0, dtype: object


In [None]:
print(df.iloc[0:7,0:4])

   age     sex     bmi  children
0   19  female  27.900         0
1   18    male  33.770         1
2   28    male  33.000         3
3   33    male  22.705         0
4   32    male  28.880         0
5   31  female  25.740         0
6   46  female  33.440         1
