A quick revision of the pandas library
- installing            --> OK   (pip install pandas)
- importing             --> OK   (import pandas as pd)   
- Pandas Series         --> 1 dimension data (a sequence)
- Pandas DataFrames     --> 2 dimensions (columns as variables)
- Read CSV files        --> 
- Read JSON files       --> 

In [None]:
# Create a simple Pandas Series from a list:
import pandas as pd
a = [1, 7, 2, 9]

myvar = pd.Series(a)
print(myvar)
print(myvar[0])

In [None]:
# Create a simple Pandas Series from a list:
# With the index argument, you can name your own labels.
import pandas as pd
a = [1, 7, 2]
myvar = pd.Series(a, index = ["x", "y", "z"])
print(myvar)
print("\n ==========================================\n ")
#When you have created labels, you can access an item by referring to the label.
print(myvar["y"])

In [None]:
# Create a simple Pandas Series from a dictionary:
# Note: The keys of the dictionary become the labels.
import pandas as pd
calories = {"day1": 420, "day2": 380, "day3": 390}
myvar = pd.Series(calories)
print(myvar)
# To select only some of the items in the dictionary, 
# use the index argument and specify only the items you want to include in the Series.
print("\n ==========================================\n ")
myvar2 = pd.Series(calories, index = ["day1", "day2"])
print(myvar2)
print("\n ==========================================\n ")


In [None]:
# Data sets in Pandas are usually multi-dimensional tables, called DataFrames.
# Series is like a column, a DataFrame is the whole table.
# Create a DataFrame from two Series:
import pandas as pd
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}
myvar = pd.DataFrame(data)
print(myvar)

In [None]:
import pandas as pd

#Create a simple Pandas dataframe:

data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}

#load data into a DataFrame object:
df = pd.DataFrame(data)
print(df) 

In [None]:
import pandas as pd
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}
df = pd.DataFrame(data)
print(df) 
print("\n ==========================================\n ")

# Pandas use the loc attribute to return one or more specified row(s)
# refer to the row index:
print(df.loc[0])   # Note: This example returns a Pandas Series.



In [None]:
# Named Indexes
# With the index argument, you can name your own indexes.
# Add a list of names to give each row a name:
import pandas as pd
data = {
  "calories": [420.9, 380.7, 390.4],
  "duration": [50, 40, 45]
}

df = pd.DataFrame(data, index = ["day1", "day2", "day3"])

print(df) 

print("\n ==========================================\n ")
#refer to the named index:
print(df.loc['day3'])

In [None]:
# Load a comma separated file (CSV file) into a DataFrame:
import pandas as pd
df = pd.read_csv('data.csv')
print(df) 


In [None]:
# max_rows 
# The number of rows returned is defined in Pandas option settings.
# You can check your system's maximum rows with the pd.options.display.max_rows statement. 

import pandas as pd
print(pd.options.display.max_rows) 
# You can change the maximum rows number with the same statement.
pd.options.display.max_rows = 9999
df = pd.read_csv('data.csv')
print(df) 

In [None]:
import pandas as pd
df = pd.read_csv('data.csv')
print(df) 
print("\n ==========================================\n")
print(df)

print(pd.options.display.max_rows) 

In [None]:
import pandas as pd
df = pd.read_json('data.json')
print(df) 

In [None]:
# JSON = Python Dictionary
# JSON objects have the same format as Python dictionaries.
# If your JSON code is not in a file, but in a Python Dictionary, 
# you can load it into a DataFrame directly:

# Load a Python Dictionary into a DataFrame:

import pandas as pd
data = {
  "Duration":{
    "0":60,
    "1":60,
    "2":60,
    "3":45,
    "4":45,
    "5":60
  },
  "Pulse":{
    "0":110,
    "1":117,
    "2":103,
    "3":109,
    "4":117,
    "5":102
  },
  "Maxpulse":{
    "0":130,
    "1":145,
    "2":135,
    "3":175,
    "4":148,
    "5":127
  },
  "Calories":{
    "0":409,
    "1":479,
    "2":340,
    "3":282,
    "4":406,
    "5":300
  }
}
df = pd.DataFrame(data)
print(df)

In [None]:
# Pandas - Analyzing DataFrames
# Viewing the Data 
# One of the most used method for getting a quick overview of the DataFrame, is the head() method.
# The head() method returns the headers and a specified number of rows, starting from the top.

# Get a quick overview by printing the first 10 rows of the DataFrame:
import pandas as pd
df = pd.read_csv('data.csv')
print(df.head(10))

# Note: if the number of rows is not specified, the head() method will return the top 5 rows.
print("\n ==========================================\n")
df2 = pd.read_csv('data.csv')
print(df2.head())

In [None]:
# There is also a tail() method for viewing the last rows of the DataFrame.
# The tail() method returns the headers and a specified number of rows, starting from the bottom.
import pandas as pd
df = pd.read_csv('data.csv')
print(df.tail()) 


In [None]:
# Info About the Data
# The DataFrames object has a method called info(), 
# that gives you more information about the data set.

import pandas as pd
df = pd.read_csv('data.csv')
print(df.info())


In [None]:
# Import pyplot from Matplotlib and visualize our DataFrame:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data.csv')
df.plot()
plt.show()

In [None]:
df.plot(kind ='scatter', x = 'Duration', y = 'Calories')
plt.show()

In [None]:
df.plot(kind = 'scatter', x = 'Duration', y = 'Maxpulse')
plt.show()

In [None]:
df["Duration"].plot(kind = 'hist')
