# Storing and Analyzing Data on Azure Blob Storage with Python

### Import Required Modules
###### Import neccessary liberarys, packages and modules like Azure storage blob, pandas, dotenv and csv.
##### Azure storage blob: 
###### This module provides classes and functions necessary to interact with Azure Blob Storage services, such as uploading, downloading, listing, and managing blobs within containers. The BlobServiceClient class from azure.storage.blob allows you to connect to Azure storage account and access the blob storage service programmatically
##### pandas
###### pandas library in Python is extensively used for data manipulation, analysis, and exploration

In [4]:
from azure.storage.blob import BlobServiceClient, BlobClient
from io import BytesIO  
import pandas as pd
import os
from dotenv import load_dotenv
import csv

### Loading Azure Credentials
###### This reads the azure credentials for sample data download

In [5]:
load_dotenv('azure_credentials.env')
account_name = os.getenv('AZURE_STORAGE_ACCOUNT')
account_key = os.getenv('AZURE_STORAGE_KEY')
container_name = os.getenv('BLOB_CONTAINER_NAME')
blob_name = os.getenv('BLOB_NAME')

### Create a BlobServiceClient Object
###### is fundamental step for interacting with Azure Blob Storage services programmatically

In [6]:
blob_service_client = BlobServiceClient(account_url=f"https://{account_name}.blob.core.windows.net", credential=account_key)
container_client = blob_service_client.get_container_client(container_name)
blob_client = container_client.get_blob_client(blob_name)
# print(blob_client)
# if blob_service_client.get_container_client(container_name).get_container_properties():
#     raise Exception(f"Container '{container_name}' not found. Please check your container name.")
    


### Downloading and Reading dataset from Azure Blob
###### Steps to download .csv file to local computer with the specified path

In [11]:
local_csv_file_path = r'C:\Users\hp\Desktop\Python lab\downloaded_blob.csv'

# Download the blob and save it as a CSV file
with open(local_csv_file_path, "w", newline="") as csv_file:
    writer = csv.writer(csv_file)
    blob_data = blob_client.download_blob()
    lines = blob_data.content_as_text().splitlines()
    for line in lines:
        writer.writerow([line])

print(f"Blob '{blob_name}' downloaded and saved as '{local_csv_file_path}' successfully.")

PermissionError: [Errno 13] Permission denied: 'C:\\Users\\hp\\Desktop\\Python lab\\downloaded_blob.csv'

### Perform Basic Data Analysis 
##### Display the first few rows of the dataset

In [8]:
df = pd.read_csv('downloaded_blob.csv')
df.head()

Unnamed: 0,code,iso,country,WWT.ind.1995,WWT.ind.1996,WWT.ind.1997,WWT.ind.1998,WWT.ind.1999,WWT.ind.2000,WWT.ind.2001,...,WWT.ind.2013,WWT.ind.2014,WWT.ind.2015,WWT.ind.2016,WWT.ind.2017,WWT.ind.2018,WWT.ind.2019,WWT.ind.2020,WWT.ind.2021,WWT.ind.2022
0,4,AFG,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,8,ALB,Albania,1.853,1.853,1.853,1.853,1.853,1.853,1.853,...,1.853,1.853,1.853,1.853,1.853,1.853,1.853,1.853,1.853,1.853
2,12,DZA,Algeria,33.12,33.12,33.12,33.12,33.12,33.12,33.12,...,33.12,33.12,33.12,33.12,33.12,33.12,33.12,33.12,33.12,33.12
3,20,AND,Andorra,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
4,24,AGO,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


##### Display the last few row of the dataset

In [9]:
df.tail()

Unnamed: 0,code,iso,country,WWT.ind.1995,WWT.ind.1996,WWT.ind.1997,WWT.ind.1998,WWT.ind.1999,WWT.ind.2000,WWT.ind.2001,...,WWT.ind.2013,WWT.ind.2014,WWT.ind.2015,WWT.ind.2016,WWT.ind.2017,WWT.ind.2018,WWT.ind.2019,WWT.ind.2020,WWT.ind.2021,WWT.ind.2022
215,876,WLF,Wallis and Futuna Islands,,,,,,,,...,,,,,,,,,,
216,732,ESH,Western Sahara,,,,,,,,...,,,,,,,,,,
217,887,YEM,Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
218,894,ZMB,Zambia,4.5,4.5,4.5,4.5,4.5,4.5,4.5,...,4.5,4.5,4.5,4.5,4.5,4.5,4.5,4.5,4.5,4.5
219,716,ZWE,Zimbabwe,37.191,37.191,37.191,37.191,37.191,37.191,37.191,...,37.191,37.191,37.191,37.191,37.191,37.191,37.191,37.191,37.191,37.191


##### Summarize the dataset information, which include detail about the column, data type and memory usage

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 31 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   code          220 non-null    int64  
 1   iso           220 non-null    object 
 2   country       220 non-null    object 
 3   WWT.ind.1995  193 non-null    float64
 4   WWT.ind.1996  193 non-null    float64
 5   WWT.ind.1997  193 non-null    float64
 6   WWT.ind.1998  193 non-null    float64
 7   WWT.ind.1999  193 non-null    float64
 8   WWT.ind.2000  193 non-null    float64
 9   WWT.ind.2001  193 non-null    float64
 10  WWT.ind.2002  193 non-null    float64
 11  WWT.ind.2003  193 non-null    float64
 12  WWT.ind.2004  193 non-null    float64
 13  WWT.ind.2005  193 non-null    float64
 14  WWT.ind.2006  193 non-null    float64
 15  WWT.ind.2007  193 non-null    float64
 16  WWT.ind.2008  193 non-null    float64
 17  WWT.ind.2009  193 non-null    float64
 18  WWT.ind.2010  193 non-null    

##### Summarized statistics including 

In [34]:
df.describe()

Unnamed: 0,code,WWT.ind.1995,WWT.ind.1996,WWT.ind.1997,WWT.ind.1998,WWT.ind.1999,WWT.ind.2000,WWT.ind.2001,WWT.ind.2002,WWT.ind.2003,...,WWT.ind.2013,WWT.ind.2014,WWT.ind.2015,WWT.ind.2016,WWT.ind.2017,WWT.ind.2018,WWT.ind.2019,WWT.ind.2020,WWT.ind.2021,WWT.ind.2022
count,220.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,...,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0,193.0
mean,437.713636,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,...,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524,24.821524
std,254.574145,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,...,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465,32.772465
min,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,217.0,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
50%,436.0,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,...,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744,7.1744
75%,660.5,43.04,43.04,43.04,43.04,43.04,43.04,43.04,43.04,43.04,...,43.04,43.04,43.04,43.04,43.04,43.04,43.04,43.04,43.04,43.04
max,894.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,...,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
