# Pandas 

In [3]:
import pandas as pd

## Creating DataFrame

#### Create a Dataframe from a Dictionary

In [5]:
# create a Dataframe from a Dictionary
data = {'name':['Alice','Bob','Charlie'], 'age':[25,30,35]}
df = pd.DataFrame(data)

In [6]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [7]:
df.columns

Index(['name', 'age'], dtype='object')

#### Creating a DataFrame from List of Lists

In [14]:
# create a Dataframe from list of lists
data = [['Alice',25],['Bob',30],['Charlie',35]]
df = pd.DataFrame(data)

In [15]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [16]:
df.columns

RangeIndex(start=0, stop=2, step=1)

In [21]:
df.columns = ['name','age'] #we can assign column value at later time also

In [22]:
df.columns

Index(['name', 'age'], dtype='object')

In [23]:
df1 = pd.DataFrame(data, columns = ['name','age']) #we can directly assign the column value at the start

In [24]:
df1.columns

Index(['name', 'age'], dtype='object')

## Accessing Data

In [25]:
df['name'] # Creating the series

0      Alice
1        Bob
2    Charlie
Name: name, dtype: object

In [26]:
type(df['name'])

pandas.core.series.Series

In [30]:
list_col = ['name','age']
df[list_col] # same as --> df['name','age']

Unnamed: 0,name,age
0,Alice,25
1,Bob,30
2,Charlie,35


In [27]:
df[['name','age']]

Unnamed: 0,name,age
0,Alice,25
1,Bob,30
2,Charlie,35


### Condition based filtering

In [41]:
df['age'] >= 30 # it olny checks the condition if its true or not and didnt fetch the value

0    False
1     True
2     True
Name: age, dtype: bool

In [40]:
df[df['age'] >= 30] #passing another df will fetch all the true values
   


Unnamed: 0,name,age
1,Bob,30
2,Charlie,35


In [42]:
df[df['name'].str.startswith('A') & (df['age'] > 25)] #as the AND condition is not satisfied it didnt give any values

Unnamed: 0,name,age


In [43]:
df[df['name'].str.startswith('A') & (df['age'] >= 25)] # It satisfy AND condition so it fetch the value

Unnamed: 0,name,age
0,Alice,25


In [45]:
df['age'].isna() # it olny checks the condition if its true or not and didnt fetch the value so we need to pass another 'df'

0    False
1    False
2    False
Name: age, dtype: bool

In [46]:
df[df['age'].isna()] #it checks if there any null vanule in specific column

Unnamed: 0,name,age


#### Filtering using existing method

In [47]:
df.loc[0, 'name'] # df.loc gives value of specific location

'Alice'

In [48]:
df.loc[0:1,['name','age']] #slicing in panda and the last index is inclusive so start=0 and stop=1 not 2

Unnamed: 0,name,age
0,Alice,25
1,Bob,30


In [50]:
df.iloc[0,0] # for fetching single value

'Alice'

In [51]:
df.iloc[0:1,[0,1]] #in this 0:1--> 1 is exclusive so it only fetch based on index start=0 stop = 1 but fetch value upto 1 index

Unnamed: 0,name,age
0,Alice,25


In [52]:
df.at[0,'name']

'Alice'

In [56]:
df.iat[0,0]

'Alice'

### Reading and Writing data

In [64]:
#df = pd.read_csv('C:\Users\nikks\pydatascience\Data\data.csv')
df = pd.read_csv('Data/data.csv')

In [65]:
df

Unnamed: 0,name,age
0,Alice,25
1,Bob,30


In [66]:
df.loc[2,'name'] = 'charlie'
df.loc[2, 'age']= 34

In [67]:
df

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0
2,charlie,34.0


In [68]:
df.to_csv('Data/data.csv', index = False)

### Data Exploration

In [69]:
df.head() #fetch first 5 row of the dataframe

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0
2,charlie,34.0


In [71]:
df.head(2) #we can define no. of row needed inside df.head(X)

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0


In [72]:
df.tail() #fetch last 5 row of the dataframe

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0
2,charlie,34.0


In [73]:
df.tail(2)

Unnamed: 0,name,age
1,Bob,30.0
2,charlie,34.0


In [75]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    3 non-null      object 
 1   age     3 non-null      float64
dtypes: float64(1), object(1)
memory usage: 180.0+ bytes


In [77]:
df.describe() #it gives only numerical values information as given in output

Unnamed: 0,age
count,3.0
mean,29.666667
std,4.50925
min,25.0
25%,27.5
50%,30.0
75%,32.0
max,34.0


In [78]:
df.value_counts() #it counts the number of unique values

name     age 
Alice    25.0    1
Bob      30.0    1
charlie  34.0    1
Name: count, dtype: int64

In [79]:
df['age'].nunique() #gives the value of unique values inside dataframe

3

In [81]:
df.shape #gives the value of  (rows, cloumns)

(3, 2)

In [82]:
df.isnull() # returns the bool value if the is.null condition is not fullfilled.

Unnamed: 0,name,age
0,False,False
1,False,False
2,False,False


In [84]:
df[df.isnull()] 

Unnamed: 0,name,age
0,,
1,,
2,,


In [85]:
df.isnull().sum()

name    0
age     0
dtype: int64