In [6]:
# Import necessary modules from the requests library for handling HTTP requests and exceptions
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json  # Import the json module to work with JSON data

# URL for the CoinMarketCap API to get the latest cryptocurrency listings
# Note: This is the production API URL; for testing, you could use the sandbox environment: 
# 'https://sandbox-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'

# Define the parameters for the API request, including starting at the first cryptocurrency,
# limiting the results to 15 cryptocurrencies, and converting prices to USD
parameters = {
  'start': '1',
  'limit': '15',
  'convert': 'USD'
}

# Define the headers for the API request, including the Accept header to expect JSON responses
# and the API key for authentication (replace with your actual API key)
headers = {
  'Accepts': 'application/json',
  'X-CMC_PRO_API_KEY': '0ad53085-1cb2-4eb8-ad9e-3ffbd7e56509',
}

# Create a session object to manage and reuse settings (like headers) across requests
session = Session()
session.headers.update(headers)  # Update the session headers with the required API key

try:
  # Send a GET request to the CoinMarketCap API with the specified URL, parameters, and headers
  response = session.get(url, params=parameters)
  
  # Parse the JSON response from the API and store it in the 'data' variable
  data = json.loads(response.text)
  
  # Uncomment the line below to print the retrieved data for inspection
  # print(data)

# Handle possible exceptions: ConnectionError, Timeout, and TooManyRedirects
except (ConnectionError, Timeout, TooManyRedirects) as e:
  # Print the error if any of these exceptions occur
  print(e)

# NOTE:
# If you encounter issues pulling data in Jupyter Notebook, try changing the data rate limit
# by entering "jupyter notebook --NotebookApp.iopub_data_rate_limit=1e10" in the Anaconda Prompt.

# If this doesn't work, try using the local host URL as shown in the tutorial video.

In [63]:
# Check and display the data type of the variable 'data'
# This is useful to confirm that the response has been successfully parsed into a Python data structure (e.g., a dictionary)
type(data)

In [18]:
# Import the pandas library for data analysis and manipulation
import pandas as pd

# This allows you to display all columns and rows when printing a DataFrame, instead of the default limited view.
# By setting 'display.max_columns' to None, pandas will show all columns without truncation.
pd.set_option('display.max_columns', None)

# Similarly, by setting 'display.max_rows' to None, pandas will show all rows of the DataFrame without truncation.
pd.set_option('display.max_rows', None)

In [64]:
# This normalizes the JSON data and converts it into a flat, tabular format, which is easier to work with in a DataFrame
# 'data['data']' refers to the key where the relevant cryptocurrency information is stored in the API response
df = pd.json_normalize(data['data'])

# Add a new column 'timestamp' to the DataFrame with the current date and time for each row, 
# to track when the data was retrieved
df['timestamp'] = pd.to_datetime('now')

# Display the DataFrame
df

In [60]:

# Define a function 'api_runner' to retrieve and process data from the CoinMarketCap API
def api_runner():
    global df  # Use the global 'df' DataFrame to allow appending new data across multiple function calls
    
    # API URL for the CoinMarketCap latest cryptocurrency listings
    url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    # For testing, you can use the sandbox environment URL: 'https://sandbox-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
    
    # Parameters for the API request, specifying to retrieve the first 15 cryptocurrencies and convert prices to USD
    parameters = {
      'start': '1',
      'limit': '15',
      'convert': 'USD'
    }
    
    # Headers including the API key for authentication and setting the expected response format to JSON
    headers = {
      'Accepts': 'application/json',
      'X-CMC_PRO_API_KEY': '0ad53085-1cb2-4eb8-ad9e-3ffbd7e56509',  # Replace with your actual API key
    }

    # Create a session to manage and reuse settings (like headers) across requests
    session = Session()
    session.headers.update(headers)

    try:
        # Send the GET request to the API and retrieve the response data
        response = session.get(url, params=parameters)
        
        # Parse the response text (JSON format) into a Python dictionary
        data = json.loads(response.text)
        # Uncomment to print the data for debugging or inspection
        # print(data)
    
    # Handle potential errors such as connection issues, timeouts, or too many redirects
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)

    # NOTE:
    # If you're using Jupyter Notebook and encounter issues pulling data, try changing the data rate limit
    # by running "jupyter notebook --NotebookApp.iopub_data_rate_limit=1e10" in the Anaconda Prompt.
    
    # Normalize the JSON data and flatten it into a tabular format (DataFrame) for easy manipulation
    # Add a timestamp to track when the data was retrieved
    df2 = pd.json_normalize(data['data'])
    df2['Timestamp'] = pd.to_datetime('now')

    # Append the new data to the global 'df' DataFrame for continuous data collection
    df = df.append(df2)

    # Uncomment the following lines if you want to save the data to a CSV file
    # Check if the CSV file exists; if not, create it and write the header
    # if not os.path.isfile(r'/Users/emmanuelalade/Documents/Data Analytics Project/Python/API.csv'):
    #     df.to_csv(r'/Users/emmanuelalade/Documents/Data Analytics Project/Python/API.csv', header='column_names')
    # else:
    #     # If the file exists, append the new data without writing the header again
    #     df.to_csv(r'/Users/emmanuelalade/Documents/Data Analytics Project/Python/API.csv', mode='a', header=False)
    
    # To read the CSV data back into a DataFrame, you can use the following line:
    # df = pd.read_csv(r'/Users/emmanuelalade/Documents/Data Analytics Project/Python/API.csv')

# If data retrieval doesn't work as expected, you can try using the local host URL as shown in the tutorial video.

In [67]:
# Import the os module for interacting with the operating system and the time module for timing operations
import os
from time import time  # Import time function to measure or track time (if needed)
from time import sleep  # Import sleep function to pause execution for a specified duration

# Loop to run the 'api_runner' function 333 times (i.e., collect data 333 times)
for i in range(333):
    api_runner()  # Call the function to retrieve and process data from the API
    print('API Runner completed')  # Print a message each time the API call is completed

    # Pause execution for 60 seconds (1 minute) before making the next API call
    sleep(60)

# Exit the script after all iterations are completed
exit()

In [68]:
# Read the CSV file containing the previously collected API data into a pandas DataFrame
df72 = pd.read_csv(r'/Users/emmanuelalade/Documents/Data Analytics Project/Python/API.csv')

# Display the DataFrame to view the content of the CSV file
df72

In [69]:
# Display the contents of the DataFrame 'df'
# This shows the data currently stored in the DataFrame, often used for inspection or debugging
df

In [22]:
# Set the display option for pandas to format floating-point numbers with 5 decimal places, 
# rather than using scientific notation (which is the default for very large or small numbers)
pd.set_option('display.float_format', lambda x: '%.5f' % x)

In [70]:
# Display the contents of the DataFrame 'df'
# This shows the data currently stored in the DataFrame, often used for inspection or debugging
df

In [71]:
# Now let's look at the coin trends over time

# Group the DataFrame 'df' by the 'name' of each cryptocurrency and calculate the mean of the percent change 
# over various time intervals: 1 hour, 24 hours, 7 days, 30 days, 60 days, and 90 days. 
# This gives an average performance of each cryptocurrency over these time periods.
df3 = df.groupby('name', sort=False)[['quote.USD.percent_change_1h',
                                      'quote.USD.percent_change_24h',
                                      'quote.USD.percent_change_7d',
                                      'quote.USD.percent_change_30d',
                                      'quote.USD.percent_change_60d',
                                      'quote.USD.percent_change_90d']].mean()

# Display the resulting DataFrame, 'df3', to see the average percent changes for each cryptocurrency.
df3

In [72]:
# Convert the wide format DataFrame 'df3' to a stacked (long) format using the stack() function.
# This reshapes the DataFrame by pivoting the time period columns into a single column, 
# with each coin having multiple rows corresponding to each time period.
df4 = df3.stack()

# Display the resulting stacked DataFrame 'df4' to view the reshaped data.
df4

In [73]:
# Check and display the type of the DataFrame 'df4'
# This is useful to confirm that the DataFrame has been converted to a stacked format (usually a pandas Series or DataFrame)
type(df4)

In [74]:
# Convert the stacked Series 'df4' back into a DataFrame and rename the column as 'values'
# This makes the stacked data easier to work with as a DataFrame, with a single 'values' column representing the data
df5 = df4.to_frame(name='values')

# Display the newly created DataFrame 'df5' with the 'values' column
df5

In [75]:
# Count the number of non-null entries in each column of the DataFrame 'df5'
# This is useful to check how many data points (rows) are present in the DataFrame
df5.count()

In [76]:
# Because of how it's structured above, we need to set an index for the DataFrame.
# I don't want to use an existing column as the index, so I'm creating a range of values to pass as the index.
# You can make this more dynamic, but for now, I'll hard code a range of 90.

index = pd.Index(range(90))

# Set the newly created Index object as the index of the DataFrame 'df5'
# The set_index() function allows replacing the current index with the provided 'index'
df6 = df5.set_index(index)

# Display the DataFrame 'df6' with the updated index
df6

# If the DataFrame only has the index and 'values' column, you can reset the index to bring it back to a default range
# by using df5.reset_index(). This can be useful if the index structure doesn't work as intended.

In [77]:
# Change the column name from 'level_1' to 'percent_change'
# The rename() function is used to update the column names of the DataFrame for clarity
# 'level_1' likely represents the time periods from the stacking operation, and we're renaming it to 'percent_change'

df7 = df6.rename(columns={'level_1': 'percent_change'})

# Display the updated DataFrame 'df7' with the renamed column
df7

In [78]:
# Replace the longer column values for 'percent_change' with shorter, more readable labels.
# The replace() function is used to substitute specific text in the 'percent_change' column.
# It replaces 'quote.USD.percent_change_24h', 'quote.USD.percent_change_7d', etc., with simpler '24h', '7d', '30d', '60d', and '90d'.
# This improves readability and makes the DataFrame easier to work with.

df7['percent_change'] = df7['percent_change'].replace(
    ['quote.USD.percent_change_24h', 'quote.USD.percent_change_7d', 'quote.USD.percent_change_30d', 'quote.USD.percent_change_60d', 'quote.USD.percent_change_90d'],
    ['24h', '7d', '30d', '60d', '90d']
)

# Display the updated DataFrame 'df7' with the shortened percent change labels
df7

In [47]:
# Import the seaborn library, which is built on top of matplotlib, for creating informative and attractive statistical graphics
import seaborn as sns

# Import matplotlib's pyplot module for creating visualizations such as plots and charts
import matplotlib.pyplot as plt

In [79]:
# Create a categorical plot using seaborn's catplot() function to visualize the relationship between 'percent_change' and 'values'.
# 'x' represents the time periods ('percent_change'), 'y' represents the values (percent change over time), and 'hue' distinguishes the data by cryptocurrency name ('name').
# 'kind='point'' specifies that the plot will be a point plot, showing the average values with a point for each category.

sns.catplot(x='percent_change', y='values', hue='name', data=df7, kind='point')

In [80]:
# Now to do something much simpler
# We are going to create a new DataFrame with only the columns we want to focus on: 'name', 'quote.USD.price', and 'timestamp'

df10 = df[['name', 'quote.USD.price', 'timestamp']]

# Filter the DataFrame to include only the rows where the cryptocurrency 'name' is 'Bitcoin'.
# The query() function is used to filter the DataFrame based on the condition provided.

df10 = df10.query("name == 'Bitcoin'")

# Display the filtered DataFrame 'df10', which now contains only the Bitcoin data.
df10

In [81]:
# Set the visual theme for the seaborn plots. 'darkgrid' adds a dark background with gridlines, 
# making the plot more visually appealing and easier to read.

sns.set_theme(style="darkgrid")

# Create a line plot using seaborn's lineplot() function.
# The x-axis represents 'timestamp' (time), and the y-axis represents 'quote.USD.price' (Bitcoin price in USD).
# The data used for the plot is the filtered DataFrame 'df10', which contains only Bitcoin data.
# This plot shows how the price of Bitcoin has changed over time.

sns.lineplot(x='timestamp', y='quote.USD.price', data=df10)