# Pandas Documentation (My Version)

# Part - 1

In [8]:
import pandas
df = pandas.read_csv("weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [9]:
# Gives dimension
rows, columns = df.shape
print(f"rows = {rows}")
print(f"columns = {columns}")

rows = 6
columns = 4


In [10]:
# Creating dataframe using dictionary
dict = {
    "Names" : ["Manash","Dikesh","Raul"],
    "Grades" : ["A","B","A+"]
}
grades_df = pandas.DataFrame(dict)
grades_df

Unnamed: 0,Names,Grades
0,Manash,A
1,Dikesh,B
2,Raul,A+


In [11]:
# Prints only few initial rows
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [12]:
# Prints specified initial rows
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [13]:
# Prints last few rows
df.tail()
# Can also specify like head

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [14]:
# DataFrame Slicing
df[2:5]
# Includes index 2 but not 5

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [15]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [16]:
df["day"] # or df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [17]:
df["temperature"] # df.temperature

0    32
1    35
2    28
3    24
4    32
5    31
Name: temperature, dtype: int64

In [18]:
# Columns of dataframe is of type **Series**
type(df["day"])

pandas.core.series.Series

In [19]:
df[["day","event"]]

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Rain
5,1/6/2017,Sunny


In [20]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [21]:
df["temperature"].max()

35

In [22]:
df["temperature"].min()

24

In [23]:
df["temperature"].mean()

30.333333333333332

In [24]:
df["temperature"].std()

3.8297084310253524

In [25]:
# statistics of data
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [26]:
# selecting data based on conditions
df[df["temperature"] > 30] # df[df.temperature > 30]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [27]:
# Only print day and temperature
df[["day","temperature"]][df["temperature"] > 30]

Unnamed: 0,day,temperature
0,1/1/2017,32
1,1/2/2017,35
4,1/5/2017,32
5,1/6/2017,31


In [28]:
# row in which temp is max
df[df["temperature"] == df["temperature"].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [29]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [30]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [31]:
df.set_index("day", inplace = True) # if inplace = True, the changes are applied directly to the oiginal df without returning new

- Location
  
  It is a way to access a data in dataframe. Major ways:
  
  df.loc[row]
  
  df.loc[row,column]

In [33]:
# df.loc[] select rows using their labels (names) rather than numerical indices
df.loc["1/5/2017"]

temperature      32
windspeed         4
event          Rain
Name: 1/5/2017, dtype: object

In [34]:
df.reset_index(inplace = True)

In [35]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [36]:
df.set_index("event", inplace = True)

In [37]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [38]:
df.loc["Sunny"]

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunny,1/2/2017,35,7
Sunny,1/6/2017,31,2


In [39]:
# reading from excel
df1 = pandas.read_excel("weather_data.xlsx","Sheet1")

In [40]:
df1

Unnamed: 0,day,temperature,windspeed,event
0,2017-01-01,32,6,Rain
1,2017-01-02,35,7,Sunny
2,2017-01-03,28,2,Snow


In [41]:
# Creating DataFrame using Tuples
weather_data = [
    ("2017-01-01",32,6,"Rain"),
    ("2017-01-02",35,7,"Sunny"),
    ("2017-01-03",28,2,"Snow")
]
df2 = pandas.DataFrame(weather_data,columns=["Day","Temp","Windspeed","Event"])
df2

Unnamed: 0,Day,Temp,Windspeed,Event
0,2017-01-01,32,6,Rain
1,2017-01-02,35,7,Sunny
2,2017-01-03,28,2,Snow


- arguments in read_csv

  nrows = 3 : means number of rows read from file to df is 3

  na_values = ["not available","n.a."] : turns values in list to NaN -> This is used for cleaning messy data

In [43]:
# stock_df = pandas.read_csv("stock_data.csv", header = None, names=["ticker","eps","revenue","price","people"]) # or use: header = 1 || skiprows = 1
stock_df = pandas.read_csv("stock_data.csv",na_values = {
    "eps" : ["not available","n.a."],
    "revenue" : ["not available","n.a.",-1], 
    "price" : ["not available","n.a."],
    "people":["not available","n.a."]
})
stock_df

Unnamed: 0,tickers,eps,revenue,price,people
0,GOOGL,27.82,87.0,845.0,larry page
1,WMT,4.61,484.0,65.0,
2,MSFT,-1.0,85.0,64.0,bill gates
3,RIL,,50.0,1023.0,mukesh ambani
4,TATA,5.6,,,ratan tata


In [81]:
# stock_df.to_csv("new.csv",index=False) -> index = False avoid writing index column to csv file
# stock_df.to_csv("new.csv",columns = ["tickers","people"], index=False) # columns specify which column to write
stock_df.to_csv("new.csv",header=False) # No headers written