In [1]:
import pathlib
data_dir = pathlib.Path().resolve().parent/"data"

In [2]:
import sys

print(sys.executable)

c:\Users\egor\AppData\Local\pypoetry\Cache\virtualenvs\leadapiappfinal-yRkPzyXw-py3.11\Scripts\python.exe


In [3]:
import swisseph as swe
import sys

swe.set_ephe_path(str(data_dir/'ephemeris'))


In [11]:
bodies = {
    'Sun': swe.SUN,
    'Moon': swe.MOON,
    'Mercury': swe.MERCURY,
    'Venus': swe.VENUS,
    'Mars': swe.MARS,
    'Jupiter': swe.JUPITER,
    'Saturn': swe.SATURN,
    'Uranus': swe.URANUS,
    'Neptune': swe.NEPTUNE,
    'Pluto': swe.PLUTO,
    'Lilith': swe.MEAN_APOG,
    'Ascending node': swe.TRUE_NODE,
    'Descending node': swe.MEAN_NODE
}

def get_zodiac_sign(longitude):
    signs = [
    "Aries", "Taurus", "Gemini", "Cancer", "Leo", "Virgo",
    "Libra", "Scorpio", "Sagittarius", "Capricorn", "Aquarius", "Pisces"
]
    index = int(longitude // 30) % 12
    return signs[index]


In [16]:
jd = swe.julday(2000, 4, 7)

for body in bodies:
    longititude = swe.calc_ut(jd, bodies[body])[0][0]

    sign = get_zodiac_sign(longititude)

    degrees = int(longititude)
    minutes = int((longititude - degrees) * 60)
    seconds = int(((longititude - degrees) * 60 - minutes) * 60)

    print(f"{body}: {degrees}° {minutes}' {seconds}'' в {sign}")

Sun: 17° 58' 20'' в Aries
Moon: 54° 2' 33'' в Taurus
Mercury: 352° 5' 57'' в Pisces
Venus: 0° 53' 36'' в Aries
Mars: 41° 15' 30'' в Taurus
Jupiter: 40° 39' 16'' в Taurus
Saturn: 46° 15' 29'' в Taurus
Uranus: 319° 54' 40'' в Aquarius
Neptune: 306° 18' 41'' в Aquarius
Pluto: 252° 45' 22'' в Sagittarius
Lilith: 274° 14' 43'' в Capricorn
Ascending node: 120° 10' 27'' в Leo
Descending node: 119° 54' 12'' в Cancer


In [23]:
import pandas as pd

athlete_bio = pd.read_csv(data_dir/'olympic/Olympic_Athlete_Bio.csv')
athlete_event_results = pd.read_csv(data_dir/'olympic/Olympic_Athlete_Event_Results.csv')
medal_tally = pd.read_csv(data_dir/'olympic/Olympic_Games_Medal_Tally.csv')
results = pd.read_csv(data_dir/'olympic/Olympic_Results.csv')
country = pd.read_csv(data_dir/'olympic/Olympics_Country.csv')

In [55]:
import re

merged_data = pd.merge(athlete_event_results, athlete_bio, on='athlete_id')
merged_data = pd.merge(merged_data, medal_tally, on='edition_id')
merged_data = pd.merge(merged_data, results, on='result_id')

team_sports = merged_data[merged_data['isTeamSport'] == True]

In [56]:
from datetime import datetime

def born_to_cosmo(born):
    longtitudes = []
    for body in bodies:
        longititude = swe.calc_ut(swe.julday(born.year, born.month, born.day), bodies[body])[0][0]
        longtitudes.append(longititude)

    return longtitudes


In [57]:
team_sports = team_sports.dropna(subset=['born'])
date_pattern = re.compile(r'^\d{1,2} \w+ \d{4}$')
team_sports = team_sports[team_sports['born'].apply(lambda x: bool(date_pattern.match(x)))]

team_sports['born'] = pd.to_datetime(team_sports['born'], format='%d %B %Y')

additional_features = team_sports['born'].apply(born_to_cosmo)
feature_names = ['Sun', 'Moon', 'Mercury', 'Venus', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto', 'Lilith', 'Ascending node', 'Descending node']

for i, feature_name in enumerate(feature_names):
    team_sports[feature_name] = additional_features.apply(lambda x: x[i])

team_sports.columns

Index(['edition_x', 'edition_id_x', 'country_noc_x', 'sport_x', 'event',
       'result_id', 'athlete', 'athlete_id', 'pos', 'medal', 'isTeamSport',
       'name', 'sex', 'born', 'height', 'weight', 'country_x', 'country_noc_y',
       'description', 'special_notes', 'edition_y', 'year', 'country_y',
       'country_noc', 'gold', 'silver', 'bronze', 'total', 'event_title',
       'edition', 'edition_id_y', 'sport_y', 'sport_url', 'result_date',
       'result_location', 'result_participants', 'result_format',
       'result_detail', 'result_description', 'Sun', 'Moon', 'Mercury',
       'Venus', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto',
       'Lilith', 'Ascending node', 'Descending node'],
      dtype='object')

In [62]:
grouped_data = team_sports.groupby(['edition_id_x', 'country_noc', 'event', 'medal']).agg({
    'name': list,
    'born': list,
    'pos': 'first',
    'Sun': list,
    'Moon': list,
    'Mercury': list,
    'Venus': list,
    'Mars': list,
    'Jupiter': list,
    'Saturn': list,
    'Uranus': list,
    'Neptune': list,
    'Pluto': list,
    'Lilith': list,
    'Ascending node': list,
    'Descending node': list
}).reset_index()

grouped_data['won'] = grouped_data['medal'] == 'Gold'


In [63]:
preprocessed_data = grouped_data[['Sun','Moon', 'Mercury', 'Venus', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto', 'Lilith', 'Ascending node', 'Descending node', 'won']]
preprocessed_data.head

<bound method NDFrame.head of                                                       Sun  \
0       [223.17501312036723, 64.51883847705251, 223.17...   
1       [173.39075886455345, 9.150414907170621, 173.39...   
2                [197.49009132410916, 27.330589047968722]   
3       [51.75219369810602, 151.0717626682138, 190.301...   
4                                    [164.01487755386822]   
...                                                   ...   
239417            [41.82619769990717, 141.67314053412147]   
239418           [201.87433560062715, 322.53437290669103]   
239419             [197.3577153181781, 97.71426008722217]   
239420           [240.98672480465387, 155.21381775977991]   
239421             [150.10657180781533, 37.9695046580853]   

                                                     Moon  \
0       [54.43503142165456, 279.45355900765236, 54.435...   
1       [66.1996458356739, 56.913773543683426, 66.1996...   
2                [295.67626153118147, 210.851671908579

In [64]:
preprocessed_data.to_csv(data_dir/'olympic/preprocessed_data.csv', index=False)