## **My first touch with Data Engineering.**

In [1]:
!pip install requests



In [7]:
import requests # This gets the "request" tool from our toolbox.

url = "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin&vs_currencies=usd"
response = requests.get(url) # This sends the waiter to the URL.

print("The status of our request was:", response.status_code)
print("The raw text we got back is:")
print(response.text)

The status of our request was: 200
The raw text we got back is:
{"bitcoin":{"usd":111575}}


In [8]:
data = response.json() # This converts the text into a Python dictionary.

print("Now the data is a Python object:")
print(data)
print("Type of 'data' variable is:", type(data))

print("\nLet's navigate it:")
print("What is the value of 'bitcoin'?", data['bitcoin'])
print("What is the USD price?", data['bitcoin']['usd'])

Now the data is a Python object:
{'bitcoin': {'usd': 111575}}
Type of 'data' variable is: <class 'dict'>

Let's navigate it:
What is the value of 'bitcoin'? {'usd': 111575}
What is the USD price? 111575


In [10]:
import requests
import pandas as pd
from datetime import datetime # This gets the current time tool.

# 1. ACQUIRE THE DATA (This is what you just mastered)
url = "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin&vs_currencies=usd"
response = requests.get(url)
data = response.json()

# 2. EXTRACT AND ADD CONTEXT
# Let's get the price the way you already know how.
price = data['bitcoin']['usd']

# Now, let's create a dictionary. Think of it as a single row in a spreadsheet.
record = {
    'cryptocurrency': 'bitcoin', # Column Name: Value
    'price_usd': price,          # Column Name: Value
    'timestamp': datetime.now()   # Column Name: Value (the current time!)
}

# Let's see what our record looks like.
print("Single Data Record:")
print(record)





# 3. STRUCTURE AND SAVE THE DATA
# Pandas DataFrames are like digital spreadsheets. We need to put our record into one.
# We first make a list containing our one record. (Later, we'll have many records in the list)
data_list = [record]

# We create a DataFrame from that list.
df = pd.DataFrame(data_list)

# Let's see our mini-spreadsheet.
print("\nDataFrame (Our Spreadsheet):")
print(df)
print("\nColumn types:")
print(df.dtypes)

# 4. PERSIST TO STORAGE
# Create a filename with the current date so it's unique and organized.
filename = "bitcoin_price.csv"
# Save the DataFrame to a CSV file. index=False means we don't need an extra number column.
df.to_csv(filename, index=False)

print(f"\nSuccess! Data saved to {filename}")

Single Data Record:
{'cryptocurrency': 'bitcoin', 'price_usd': 111393, 'timestamp': datetime.datetime(2025, 8, 25, 13, 11, 54, 996749)}

DataFrame (Our Spreadsheet):
  cryptocurrency  price_usd                  timestamp
0        bitcoin     111393 2025-08-25 13:11:54.996749

Column types:
cryptocurrency            object
price_usd                  int64
timestamp         datetime64[ns]
dtype: object

Success! Data saved to bitcoin_price.csv


In [11]:
import requests
import pandas as pd
from datetime import datetime

# 1. ACQUIRE THE DATA
# Notice we added 'ethereum' to the list of ids
url = "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin,ethereum&vs_currencies=usd"
response = requests.get(url)
data = response.json()

# Let's see the new, more complex structure we get back
print("Raw API Response:")
print(data)
print("\n")

# 2. EXTRACT AND ADD CONTEXT USING A LOOP
data_list = [] # We start with an empty list to hold our records

# The .items() method lets us loop through a dictionary's keys and values.
for coin_name, price_info in data.items():
    print(f"Processing: {coin_name}")
    print(f"The data for this coin is: {price_info}")

    # For each coin, create a new record (a new row)
    record = {
        'cryptocurrency': coin_name, # The key (e.g., 'bitcoin') becomes the value
        'price_usd': price_info['usd'], # Navigate into the inner dictionary
        'timestamp': datetime.now()
    }
    data_list.append(record) # Add this record to our list
    print(f"Record created: {record}\n")

# 3. STRUCTURE AND SAVE THE DATA
df = pd.DataFrame(data_list) # Create a DataFrame from the list of records
print("Final DataFrame:")
print(df)

# 4. PERSIST TO STORAGE
filename = "multi_crypto_prices.csv"
df.to_csv(filename, index=False)
print(f"\nSuccess! Data saved to {filename}")

Raw API Response:
{'bitcoin': {'usd': 111381}, 'ethereum': {'usd': 4630.53}}


Processing: bitcoin
The data for this coin is: {'usd': 111381}
Record created: {'cryptocurrency': 'bitcoin', 'price_usd': 111381, 'timestamp': datetime.datetime(2025, 8, 25, 13, 19, 4, 575019)}

Processing: ethereum
The data for this coin is: {'usd': 4630.53}
Record created: {'cryptocurrency': 'ethereum', 'price_usd': 4630.53, 'timestamp': datetime.datetime(2025, 8, 25, 13, 19, 4, 575052)}

Final DataFrame:
  cryptocurrency  price_usd                  timestamp
0        bitcoin  111381.00 2025-08-25 13:19:04.575019
1       ethereum    4630.53 2025-08-25 13:19:04.575052

Success! Data saved to multi_crypto_prices.csv


In [13]:
import requests
import pandas as pd
from datetime import datetime

#url = "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin,ethereum&vs_currencies=usd"
# Trying out broken url for better understanding.
url = "https://api.coingecko.com/api/v3/THIS_DOES_NOT_EXIST"

# Initialize an empty list to store our data. We do this OUTSIDE the try block.
data_list = []

try:
    # 1. ATTEMPT TO ACQUIRE THE DATA
    print("Sending request to API...")
    response = requests.get(url)

    # 2. CHECK FOR HTTP ERRORS (e.g., 404 Not Found, 500 Server Error)
    # This will raise an exception if the request failed.
    response.raise_for_status()
    print("API request successful!")

    # 3. ATTEMPT TO PARSE THE JSON RESPONSE
    data = response.json()
    print("JSON data parsed.")

    # 4. VALIDATE AND PROCESS THE DATA
    for coin_name, price_info in data.items():

        # Check if the 'usd' key exists inside this coin's data.
        if 'usd' in price_info:
            record = {
                'cryptocurrency': coin_name,
                'price_usd': price_info['usd'],
                'timestamp': datetime.now()
            }
            data_list.append(record)
            print(f"✓ Successfully processed data for {coin_name}.")
        else:
            # This is not an error that stops the script. It's a warning.
            print(f"⚠ Warning: Expected 'usd' key not found for {coin_name}. Skipping.")

    # 5. CHECK IF WE ACTUALLY GOT ANY DATA
    if not data_list: # This checks if the list is empty
        raise Exception("No valid data was processed from the API response.")

    # 6. SAVE THE DATA
    df = pd.DataFrame(data_list)
    filename = "robust_crypto_prices.csv"
    df.to_csv(filename, index=False)
    print(f"\n✅ Success! Data from {len(data_list)} coins saved to {filename}")

# THIS BLOCK ONLY RUNS IF AN ERROR OCCURRED IN THE TRY BLOCK
except requests.exceptions.RequestException as e:
    print(f"❌ A network error occurred: {e}")
except ValueError as e:
    print(f"❌ Failed to decode JSON response: {e}")
except Exception as e: # Catches any other unexpected errors
    print(f"❌ An unexpected error occurred: {e}")

Sending request to API...
❌ A network error occurred: 404 Client Error: Not Found for url: https://api.coingecko.com/api/v3/THIS_DOES_NOT_EXIST


In [None]:
# import os

# # List all files in the current directory
# print("Files in current directory:")
# all_files = os.listdir()
# for file in all_files:
#     print(file)

In [None]:
# # SAFETY FIRST: Let's list only CSV files first to confirm what we're about to delete.
# print("CSV files found:")
# csv_files = [file for file in all_files if file.endswith('.csv')]
# for file in csv_files:
#     print(file)

In [None]:
# # Delete all CSV files
# for file in csv_files:
#     try:
#         os.remove(file)
#         print(f"Deleted: {file}")
#     except Exception as e:
#         print(f"Could not delete {file}: {e}")

# # Verify they are gone
# print("\nRemaining files after deletion:")
# print(os.listdir())