In [2]:
import pandas as pd

## Reading from a CSV File

In [23]:
weather = pd.read_csv("weather.csv", parse_dates=["date"])
weather

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain
...,...,...,...,...,...,...
1456,2015-12-27,8.6,4.4,1.7,2.9,rain
1457,2015-12-28,1.5,5.0,1.7,1.3,rain
1458,2015-12-29,0.0,7.2,0.6,2.6,fog
1459,2015-12-30,0.0,5.6,-1.0,3.4,sun


Get column types:

In [24]:
weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1461 entries, 0 to 1460
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           1461 non-null   datetime64[ns]
 1   precipitation  1461 non-null   float64       
 2   temp_max       1461 non-null   float64       
 3   temp_min       1461 non-null   float64       
 4   wind           1461 non-null   float64       
 5   weather        1461 non-null   object        
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 68.6+ KB


## Reading from a JSON File

In [37]:
football = pd.read_json("matches.json")
football

Unnamed: 0,date,team1,team2,score_team1,score_team2,group
0,2010-07-17,Kapfenberger SV,FC RB Salzburg,0,0,
1,2010-07-17,SC Wiener Neustadt,LASK Linz,5,0,
2,2010-07-17,SV Ried,SK Sturm Graz,0,3,
3,2010-07-18,FK Austria Wien,SV Mattersburg,2,0,
4,2010-07-18,FC Wacker Innsbruck,SK Rapid Wien,4,0,
...,...,...,...,...,...,...
34115,2018-05-18,Spezia,Parma,0,2,
34116,2018-05-18,Empoli,AC Perugia,2,1,
34117,2018-05-18,Venezia,Pescara,0,0,
34118,2018-05-18,Cittadella,Pro Vercelli,2,0,


## Reading from an SQL Database

In [26]:
from sqlite3 import connect
from lab import create_sample_sql_table

connection = connect(":memory:")
create_sample_sql_table("users", connection)

pd.read_sql('SELECT uid, name FROM users', connection)

Unnamed: 0,uid,name
0,0,Alex
1,1,Guy
2,2,Ravi
3,3,Rafa


## Analyzing Weather Data

Start by looking at basic statistical descriptors of the dataset:

In [27]:
weather.describe()

Unnamed: 0,precipitation,temp_max,temp_min,wind
count,1461.0,1461.0,1461.0,1461.0
mean,3.029432,16.439083,8.234771,3.241136
std,6.680194,7.349758,5.023004,1.437825
min,0.0,-1.6,-7.1,0.4
25%,0.0,10.6,4.4,2.2
50%,0.0,15.6,8.3,3.0
75%,2.8,22.2,12.2,4.0
max,55.9,35.6,18.3,9.5


Find the day with the lowest min temperature

In [28]:
absolute_temp_min = weather.temp_min.min()
weather[weather.temp_min == absolute_temp_min]

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
706,2013-12-07,0.0,0.0,-7.1,3.1,sun


Find the day with the highest max temperature

In [29]:
absolute_temp_max = weather.temp_max.max()
weather[weather.temp_max == absolute_temp_max]

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
953,2014-08-11,0.5,35.6,17.8,2.6,rain


Are there any null values in the dataset?

In [30]:
weather.isnull().values.any()

False

We can also count null values 

In [31]:
weather.isnull().sum()

date             0
precipitation    0
temp_max         0
temp_min         0
wind             0
weather          0
dtype: int64

## Analyzing Football Matches Data