In [1]:
import pandas as pd
import numpy as np

In [2]:
# Create a series
series = pd.Series([1,2,3,4,5])
series

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
myDict = {
    'Name': ['Ehab','Ahmed','Ali','Ziad','Mo','Loay'],
    'Age': [23,25,22,None,23,23],
    'City': ['October','Kabir','Vegas','Ab','cD','23Oct']
}

df = pd.DataFrame(myDict)
print(df)
df.info()

    Name   Age     City
0   Ehab  23.0  October
1  Ahmed  25.0    Kabir
2    Ali  22.0    Vegas
3   Ziad   NaN       Ab
4     Mo  23.0       cD
5   Loay  23.0    23Oct
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    6 non-null      object 
 1   Age     5 non-null      float64
 2   City    6 non-null      object 
dtypes: float64(1), object(2)
memory usage: 272.0+ bytes


In [4]:
df.head()  # See first 5 rows by default
df.head(10)  # See 10 rows

df.tail()   # See last 5 rows by default
df.tail(10)

Unnamed: 0,Name,Age,City
0,Ehab,23.0,October
1,Ahmed,25.0,Kabir
2,Ali,22.0,Vegas
3,Ziad,,Ab
4,Mo,23.0,cD
5,Loay,23.0,23Oct


In [5]:
df.describe()   # info about this dataframe

Unnamed: 0,Age
count,5.0
mean,23.2
std,1.095445
min,22.0
25%,23.0
50%,23.0
75%,23.0
max,25.0


In [6]:
df.describe(include='all')   # include all descriptions not just default

Unnamed: 0,Name,Age,City
count,6,5.0,6
unique,6,,6
top,Ehab,,October
freq,1,,1
mean,,23.2,
std,,1.095445,
min,,22.0,
25%,,23.0,
50%,,23.0,
75%,,23.0,


In [7]:
df['Name']

0     Ehab
1    Ahmed
2      Ali
3     Ziad
4       Mo
5     Loay
Name: Name, dtype: object

In [8]:
# Condition on data frames
df[df['Age'] > 22]

Unnamed: 0,Name,Age,City
0,Ehab,23.0,October
1,Ahmed,25.0,Kabir
4,Mo,23.0,cD
5,Loay,23.0,23Oct


In [9]:
df[df['City'] == 'October']

Unnamed: 0,Name,Age,City
0,Ehab,23.0,October


In [10]:
df.loc[0:2,['Name','Age']]

Unnamed: 0,Name,Age
0,Ehab,23.0
1,Ahmed,25.0
2,Ali,22.0


In [11]:
df["Age"].mean()

np.float64(23.2)

In [12]:
df["Age"].max()

np.float64(25.0)

In [13]:
# Add new Feature (Column)
df['Salary']=[200,500,57,105,353,535]
df

Unnamed: 0,Name,Age,City,Salary
0,Ehab,23.0,October,200
1,Ahmed,25.0,Kabir,500
2,Ali,22.0,Vegas,57
3,Ziad,,Ab,105
4,Mo,23.0,cD,353
5,Loay,23.0,23Oct,535


In [14]:
# Updating Feature
df['Age'] += 1
df

Unnamed: 0,Name,Age,City,Salary
0,Ehab,24.0,October,200
1,Ahmed,26.0,Kabir,500
2,Ali,23.0,Vegas,57
3,Ziad,,Ab,105
4,Mo,24.0,cD,353
5,Loay,24.0,23Oct,535


In [15]:
# Drop a feature (1 -> Coloumn, 0 -> Row)
df.drop('Salary',axis=1)

Unnamed: 0,Name,Age,City
0,Ehab,24.0,October
1,Ahmed,26.0,Kabir
2,Ali,23.0,Vegas
3,Ziad,,Ab
4,Mo,24.0,cD
5,Loay,24.0,23Oct


In [16]:
# Drop a feature permentaly "add inplace=True"
df.drop('Salary',axis=1)
df

Unnamed: 0,Name,Age,City,Salary
0,Ehab,24.0,October,200
1,Ahmed,26.0,Kabir,500
2,Ali,23.0,Vegas,57
3,Ziad,,Ab,105
4,Mo,24.0,cD,353
5,Loay,24.0,23Oct,535


In [17]:
# Drop a row
df.drop(3,axis=0)
df

Unnamed: 0,Name,Age,City,Salary
0,Ehab,24.0,October,200
1,Ahmed,26.0,Kabir,500
2,Ali,23.0,Vegas,57
3,Ziad,,Ab,105
4,Mo,24.0,cD,353
5,Loay,24.0,23Oct,535


In [18]:
df.reset_index()    # After deleting rows - Reseting the rows

Unnamed: 0,index,Name,Age,City,Salary
0,0,Ehab,24.0,October,200
1,1,Ahmed,26.0,Kabir,500
2,2,Ali,23.0,Vegas,57
3,3,Ziad,,Ab,105
4,4,Mo,24.0,cD,353
5,5,Loay,24.0,23Oct,535


In [19]:
df.isnull() # Return True in cell that has null

Unnamed: 0,Name,Age,City,Salary
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,True,False,False
4,False,False,False,False
5,False,False,False,False


In [20]:
df.dropna() # Drop Null Rows

Unnamed: 0,Name,Age,City,Salary
0,Ehab,24.0,October,200
1,Ahmed,26.0,Kabir,500
2,Ali,23.0,Vegas,57
4,Mo,24.0,cD,353
5,Loay,24.0,23Oct,535


In [21]:
df['Age'].dropna() # Drop Null Rows

0    24.0
1    26.0
2    23.0
4    24.0
5    24.0
Name: Age, dtype: float64

In [22]:
df['Age'].fillna(df['Age'].mode()[0], inplace=True)   # Fill Null - Take first one if equal

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Age'].fillna(df['Age'].mode()[0], inplace=True)   # Fill Null - Take first one if equal


In [23]:
x=df.groupby('City')['Age'].mean()  # greoup by city
x

City
23Oct      24.0
Ab         24.0
Kabir      26.0
October    24.0
Vegas      23.0
cD         24.0
Name: Age, dtype: float64

In [24]:
def doubleSalary(x):
    return x*2

In [28]:
df['DoubleSalary'] = df['Salary'].apply(doubleSalary)
df

Unnamed: 0,Name,Age,City,Salary,DoubleSalary
0,Ehab,24.0,October,200,400
1,Ahmed,26.0,Kabir,500,1000
2,Ali,23.0,Vegas,57,114
3,Ziad,24.0,Ab,105,210
4,Mo,24.0,cD,353,706
5,Loay,24.0,23Oct,535,1070
