In [None]:
import requests
import os
import time
import pandas as pd

# URL of the dataset
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-ML0101EN-SkillsNetwork/labs/Module%203/data/yellow_tripdata_2019-06.csv"
file_name = 'yellow_tripdata_2019-06.csv'

# Function to download the file with retries and show download progress
def download_file(url, file_name, retries=5, timeout=60):
    attempt = 0
    while attempt < retries:
        try:
            # Send a GET request to the URL with a timeout and stream the response
            response = requests.get(url, stream=True, timeout=timeout)
            
            # Check if the request was successful (HTTP status code 200)
            if response.status_code == 200:
                print("Download started...")
                
                # Get the total file size from the headers
                total_size = int(response.headers.get('Content-Length', 0))
                downloaded = 0

                # Open the file in write-binary mode and save the content
                with open(file_name, 'wb') as file:
                    for chunk in response.iter_content(chunk_size=1024):  # Download the file in 1KB chunks
                        downloaded += len(chunk)
                        file.write(chunk)
                        
                        # Print the download progress
                        if total_size > 0:
                            percent = (downloaded / total_size) * 100
                            print(f"\rDownloaded {downloaded}/{total_size} bytes ({percent:.2f}%)", end="")
                
                print(f"\nFile downloaded successfully: {file_name}")
                return True
            else:
                print(f"Failed to download file. HTTP status code: {response.status_code}")
                return False
        except requests.exceptions.Timeout:
            print(f"Timeout occurred, retrying... ({attempt + 1}/{retries})")
        except requests.exceptions.RequestException as e:
            print(f"An error occurred: {e}, retrying... ({attempt + 1}/{retries})")
        
        attempt += 1
        time.sleep(5)  # Wait before retrying
    
    print(f"Failed to download file after {retries} attempts.")
    return False

# Call the function to download the file
if download_file(url, file_name):
    # Verify that the file has been downloaded and saved
    print("Files in current directory:", os.listdir())

    # If the file is downloaded, load it using pandas
    try:
        # Read the CSV file
        raw_data = pd.read_csv(file_name)

        # Basic information about the dataset
        print("There are " + str(len(raw_data)) + " observations in the dataset.")
        print("There are " + str(len(raw_data.columns)) + " variables in the dataset.")

        # Display the first few rows of the dataset
        print(raw_data.head())

    except FileNotFoundError:
        print("File not found. Make sure the file is downloaded correctly.")
    except Exception as e:
        print(f"Error loading the CSV file: {e}")
else:
    print("The file was not downloaded.")
