In [63]:
import pandas as pd
import numpy as np
import ipaddress
import requests
import math
import time

In [64]:
# Load each unique IP address and their count
ip_addresses = pd.read_csv('IP_Addresses.csv')
ip_addresses

Unnamed: 0,ip_address,count
0,50.91.209.173,2
1,47.156.161.69,5
2,179.9.145.134,8
3,156.34.48.41,2
4,107.130.132.75,2
...,...,...
5427,73.235.48.61,2
5428,90.242.144.253,2
5429,174.20.154.172,9
5430,72.208.37.51,2


In [65]:
#Convert IP address to latitude and longitude coordinates
def get_geolocation(ip):
    url = f'http://ip-api.com/json/{ip}'
    while True:
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            if data['status'] == 'fail':
                print(f"Failed to get data for {ip}: {data['message']}")
                return None
            return (data['lat'], data['lon'])
        elif response.status_code == 429:
            print(f"Rate limit exceeded for IP {ip}. Retrying in 5 seconds...")
            time.sleep(5)  # Wait before retrying
        else:
            print(f"Error: {response.status_code} for IP {ip}")
            return None

In [66]:

def haversine(coord1, coord2):
    R = 6371  # Earth radius in kilometers
    lat1, lon1 = coord1
    lat2, lon2 = coord2

    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)

    a = math.sin(dlat / 2) ** 2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    return R * c  # Distance in kilometers
    
usc_coordinates = (34.0219, -118.4814)
ucla_coordinates = (34.0682, -118.4455)

# Example usage with the location you retrieved
location_trial = get_geolocation(ip_addresses.iloc[0,0])
distance_to_usc = haversine(usc_coordinates, location_trial)
distance_to_ucla = haversine(ucla_coordinates, location_trial)

print(f"Distance to USC: {distance_to_usc:.2f} km")
print(f"Distance to UCLA: {distance_to_ucla:.2f} km")

Distance to USC: 207.08 km
Distance to UCLA: 203.37 km


In [67]:
location_trial = get_geolocation(ip_addresses.iloc[0,0])
location_trial

(35.7768, -119.2414)

In [68]:
ip_addresses["coordinates"] = ip_addresses['ip_address'].apply(func=get_geolocation)
ip_addresses

Rate limit exceeded for IP 76.35.85.139. Retrying in 5 seconds...
Rate limit exceeded for IP 76.35.85.139. Retrying in 5 seconds...
Rate limit exceeded for IP 98.98.195.60. Retrying in 5 seconds...
Rate limit exceeded for IP 98.98.195.60. Retrying in 5 seconds...


KeyboardInterrupt: 

In [18]:
# Define USC and UCLA coordinates
usc_coordinates = (34.0219, -118.4814)
ucla_coordinates = (34.0682, -118.4455)

In [103]:
def is_usc_ip(ip):
    if pd.isna(ip):  # Check for NaN values
        return False
    try:
        ip_obj = ipaddress.ip_address(ip)
        return any(ip_obj in ipaddress.ip_network(range) for range in usc_ranges)
    except ValueError:
        return False  # Handle invalid IP addresses

# Apply the function to the DataFrame
ip_addresses['is_usc'] = ip_addresses['ip_address'].apply(is_usc_ip)

# Count the total number of USC users
usc_user_count = ip_addresses['is_usc'].sum()

print(f"Total USC users: {usc_user_count}")

Total USC users: 0


In [104]:
def is_ucla_ip(ip):
    if pd.isna(ip):  # Check for NaN values
        return False
    try:
        ip_obj = ipaddress.ip_address(ip)
        return any(ip_obj in ipaddress.ip_network(range) for range in ucla_ranges)
    except ValueError:
        return False  # Handle invalid IP addresses

# Apply the function to the DataFrame
ip_addresses['is_ucla'] = ip_addresses['ip_address'].apply(is_usc_ip)

# Count the total number of USC users
usc_user_count = ip_addresses['is_ucla'].sum()

print(f"Total USC users: {usc_user_count}")

Total USC users: 0
