 # Instructor Do: Grouping

 ### Import Libraries and Dependencies

In [None]:
import pandas as pd
from pathlib import Path

%matplotlib inline

 ### Read in File and Clean Data

In [None]:
# Read CSV
csv_path = Path("crypto_data.csv")
crypto_data = pd.read_csv(
    csv_path, index_col="data_date", parse_dates=True, infer_datetime_format=True
)
crypto_data

# Drop all columns except cryptocurrency and data_price
crypto_data = crypto_data.drop(columns=["data_time", "timestamp"])

# Sort the dates in ascending order
crypto_data = crypto_data.sort_index()

# Drop missing values
crypto_data = crypto_data.dropna()
crypto_data.head()

 ### Group DataFrame and perform `count` aggregation

In [None]:
# Group by cryptocurrency and perform count
crypto_data_grp = crypto_data.groupby("cryptocurrency").count()
crypto_data_grp

 ### Group DataFrame without aggregate function

In [None]:
# The groupby function requires a function or aggregation.

# Whenever a function is not chained to a groupby function, the output will be a DataFrameGroupBy object rather than an actual DataFrame.

# DataFrameGroupBy objects must be aggregated before they can be used.

In [None]:
# Group by cryptocurrency
crypto_data_grp = crypto_data.groupby("cryptocurrency")
crypto_data_grp

 ### Group DataFrame by `cryptocurrency` and calculate the average `data_price`

In [None]:
# Calculate average data_price for each crypto
crypto_data_mean = crypto_data.groupby("cryptocurrency").mean()
crypto_data_mean

 ### Group by more than one column and calculate count

In [None]:
# Group by more than one column
multi_group = crypto_data.groupby(["cryptocurrency", "data_price"])["data_price"].count()
multi_group

 ### Group by more than one column, round price to two decimal places, and calculate count

In [None]:
# Group by more than one column after rounding to two decimal places
rounded_crypto_data = crypto_data.round({"data_price": 2})
# View the docs for round()...the first param 'decimals' can be of type 'int' or 'dict' or 'Series'

multi_group = rounded_crypto_data.groupby(["cryptocurrency", "data_price"])["data_price"].count()
multi_group

 ### Plot grouped data to generate more than one line on the same chart

In [None]:
# Plot data_price for each crypto across time
grouped_cryptos = crypto_data.groupby("cryptocurrency")["data_price"].plot(legend=True)
grouped_cryptos

In [None]:
# Check out the "Group by" user guide for more info - https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html