## Pandas

In [1]:
!pip install pandas

Collecting pandas
  Downloading pandas-1.2.4-cp39-cp39-win_amd64.whl (9.3 MB)
Collecting pytz>=2017.3
  Downloading pytz-2021.1-py2.py3-none-any.whl (510 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.2.4 pytz-2021.1


In [2]:
import pandas as pd
import numpy as np

In [3]:
# So Pandas is used to work with data and dataframes. Helps create and Read CSV Files etc.

In [4]:
# Let us create a sample data from which we will create a CSV (Comma Separated) File using Pandas

actor = {
    "Popularity": np.random.randint(1,11,5),
    "Income":np.random.randint(10000,1000000,5),
    "projects":np.random.randint(5,50,5),
}

print(actor)

{'Popularity': array([ 7, 10, 10,  1,  6]), 'Income': array([626307, 680674,  68870, 213381, 543328]), 'projects': array([35, 18, 33, 39,  5])}


In [5]:
df = pd.DataFrame(actor)  # Creates a Dataframe from a iterable Dictionary

In [6]:
print(df)

   Popularity  Income  projects
0           7  626307        35
1          10  680674        18
2          10   68870        33
3           1  213381        39
4           6  543328         5


In [7]:
df.head()  # Nicely tables in table format

df.head(n=3)  # Return specified number of rows in Dataframe

Unnamed: 0,Popularity,Income,projects
0,7,626307,35
1,10,680674,18
2,10,68870,33


In [8]:
df.columns  # Return the column names in DataFrame

Index(['Popularity', 'Income', 'projects'], dtype='object')

In [9]:
df.to_csv("actors_data.csv") 
# Creates a CSV file with specified filename in specified path

### Reading From CSV

In [15]:
data = pd.read_csv("actors_data.csv") # Reads data from CSV file

In [11]:
print(data)

   Unnamed: 0  Popularity  Income  projects
0           0           7  626307        35
1           1          10  680674        18
2           2          10   68870        33
3           3           1  213381        39
4           4           6  543328         5


In [12]:
# Now you see we have an unnamed column

# To Drop it
data.drop(columns=['Unnamed: 0'])

Unnamed: 0,Popularity,Income,projects
0,7,626307,35
1,10,680674,18
2,10,68870,33
3,1,213381,39
4,6,543328,5


In [13]:
data.shape # SEE No. of columns did not change even after dropping

(5, 4)

In [14]:
# Setting index = false doesn't creates extra column with indexes

df.to_csv("actors_data.csv",index=False) 

In [16]:
print(data)

   Popularity  Income  projects
0           7  626307        35
1          10  680674        18
2          10   68870        33
3           1  213381        39
4           6  543328         5


In [17]:
data.shape

(5, 3)

In [18]:
data.head()

Unnamed: 0,Popularity,Income,projects
0,7,626307,35
1,10,680674,18
2,10,68870,33
3,1,213381,39
4,6,543328,5


In [20]:
data.tail(n=2) # Works Similar to head() but Returns from bottom

Unnamed: 0,Popularity,Income,projects
3,1,213381,39
4,6,543328,5


In [21]:
data.describe()  # Returns Some basic operations on data

Unnamed: 0,Popularity,Income,projects
count,5.0,5.0,5.0
mean,6.8,426512.0,26.0
std,3.701351,269951.368514,14.177447
min,1.0,68870.0,5.0
25%,6.0,213381.0,18.0
50%,7.0,543328.0,33.0
75%,10.0,626307.0,35.0
max,10.0,680674.0,39.0


In [25]:
# To read data at Locations

#Row
df.iloc[2]  # Returns data at 2nd index => 3rd Row

Popularity       10
Income        68870
projects         33
Name: 2, dtype: int32

In [28]:
#Row and Column
df.iloc[2,1]
df.iloc[2][1]

68870

In [32]:
# If you don't know the index of column

index = df.columns.get_loc("Income")
print(index)

print(df.iloc[1,index])

# If you want data from multiple column of unknown index
index = [df.columns.get_loc("Income"),df.columns.get_loc("Popularity")] # Store In List
print(index)
print(df.iloc[1,index])

1
680674
[1, 0]
Income        680674
Popularity        10
Name: 1, dtype: int32


In [33]:
# Slicing in Location

df.iloc[:3,index]

Unnamed: 0,Income,Popularity
0,626307,7
1,680674,10
2,68870,10


In [35]:
# Sorts the values in Dataframe by priorities as well 
data.sort_values(by=["Popularity","Income"],ascending=False)

Unnamed: 0,Popularity,Income,projects
1,10,680674,18
2,10,68870,33
0,7,626307,35
4,6,543328,5
3,1,213381,39


In [36]:
data_array = data.values  # Returns numpy array of values in dataframe
print(data_array)

[[     7 626307     35]
 [    10 680674     18]
 [    10  68870     33]
 [     1 213381     39]
 [     6 543328      5]]


In [37]:
print(data.shape)
print(data_array.shape)

(5, 3)
(5, 3)


In [39]:
# We Can convert the numpy array back to new dataframe

new_df = pd.DataFrame(data_array,columns=["Gpa","Salary","work"])

In [40]:
new_df

Unnamed: 0,Gpa,Salary,work
0,7,626307,35
1,10,680674,18
2,10,68870,33
3,1,213381,39
4,6,543328,5


In [41]:
new_df.to_csv("collegeTnP.csv",index=False)

In [42]:
tnp = pd.read_csv("collegeTnP.csv")

tnp

Unnamed: 0,Gpa,Salary,work
0,7,626307,35
1,10,680674,18
2,10,68870,33
3,1,213381,39
4,6,543328,5
