In [2]:
"""
This script retrieves the annual average UV Index for population groups based on their approximate geographic coordinates.
It uses the NASA POWER API to query climatology data and stores the results in a pandas DataFrame for further analysis.
The output can be used to study the relationship between UV radiation exposure and allele frequencies in different populations.

There are no special instructions for this portion of the file.
"""

# Import necessary packages 
import pandas as pd
import requests
import time

# Coordinates from updated population locations
populations = {
    'European': (51.1657, 10.4515),
    'African Others': (9.0820, 8.6753),
    'East Asian': (35.8617, 104.1954),
    'African American': (37.0902, -95.7129),
    'Latin American 1': (23.6345, -102.5528),
    'Latin American 2': (-14.2350, -51.9253),
    'Other Asian': (12.8797, 121.7740),
    'South Asian': (20.5937, 78.9629),
    'African': (-1.2921, 36.8219),
    'Asian': (-0.7893, 113.9213),
    'Total': (0.0, 0.0),  # Not geographically meaningful
    'Other': (None, None)
}

# Function to fetch annual average UV Index from NASA POWER
def get_uv_index(lat, lon):
    if lat is None or lon is None:
        return None
    url = f"https://power.larc.nasa.gov/api/temporal/climatology/point?parameters=ALLSKY_SFC_UV_INDEX&community=RE&longitude={lon}&latitude={lat}&format=JSON"
    try:
        response = requests.get(url)
        data = response.json()
        monthly_data = data['properties']['parameter']['ALLSKY_SFC_UV_INDEX']
        annual_avg = sum(monthly_data.values()) / len(monthly_data)
        return round(annual_avg, 2)
    except:
        return None

# Build result dictionary
uv_data = []
for pop, (lat, lon) in populations.items():
    uv_index = get_uv_index(lat, lon)
    uv_data.append({'Population_Name': pop, 'Latitude': lat, 'Longitude': lon, 'Annual_UV_Index': uv_index})
    time.sleep(1)  # to avoid rate limiting

uv_df = pd.DataFrame(uv_data)

uv_df


Unnamed: 0,Population_Name,Latitude,Longitude,Annual_UV_Index
0,European,51.1657,10.4515,0.58
1,African Others,9.082,8.6753,1.9
2,East Asian,35.8617,104.1954,1.25
3,African American,37.0902,-95.7129,1.18
4,Latin American 1,23.6345,-102.5528,2.19
5,Latin American 2,-14.235,-51.9253,2.18
6,Other Asian,12.8797,121.774,2.11
7,South Asian,20.5937,78.9629,1.74
8,African,-1.2921,36.8219,2.38
9,Asian,-0.7893,113.9213,2.01


In [8]:
"""
This section of code merges the dataset cleaned_ncbi_alfa.csv that was previously cleaned in R,
with the newly created, uv_df. The dataframe is then saved as a new CSV file. 

To use this script, ensure that all file paths match your working directory.
"""
# Load the uploaded dataset
allele_df = pd.read_csv("C:/Users/maeso/OneDrive/Documents/cleaned_ncbi_alfa.csv")

# Merge the allele frequency data with UV data
merged_df = pd.merge(allele_df, uv_df, on='Population_Name', how='left')

# Display the first few rows
merged_df.head()

# Save the merged dataset to a new CSV file
merged_df.to_csv("C:/Users/maeso/OneDrive/Documents/merged_allele_uv_data.csv", index=False)