In [None]:
import pandas as pd
df = pd.read_csv('pga_scoring_and_drive.csv')
df.head()

In [None]:
drive_avg_per_player = df.groupby('player')['drive_avg'].mean()
drive_avg_per_player

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np

predictions = {}
for player in df['player'].unique():
    player_data = df[df['player'] == player]
    X = player_data['year'].values.reshape(-1, 1)
    y = player_data['drive_avg'].values
    model = LinearRegression()
    model.fit(X, y)
    years = np.arange(df['year'].max() + 1, 2100).reshape(-1, 1)
    pred = model.predict(years)
    try:
        year_317 = years[pred >= 317][0][0]
    except IndexError:
        year_317 = 'Never'
    predictions[player] = year_317
predictions

{'Payne Stewart': 'Never',
 'Dan Pohl': 'Never',
 'Lanny Wadkins': 'Never',
 'Paul Azinger': 2039,
 'David Frost': 2054,
 'Bernhard Langer': 2031,
 'Curtis Strange': 'Never',
 'Hal Sutton': 2030,
 'Scott Hoch': 2048,
 "Mark O'Meara": 2055,
 'Steve Pate': 2064,
 'Craig Stadler': 'Never',
 'Tom Kite': 2049,
 'Fred Couples': 2024,
 'Tom Watson': 'Never',
 'Ben Crenshaw': 'Never',
 'Gil Morgan': 'Never',
 'Greg Norman': 'Never',
 'Bobby Wadkins': 'Never',
 'Hale Irwin': 'Never',
 'Corey Pavin': 2099,
 'Mike Reid': 'Never',
 'Richard Zokol': 2069,
 'Larry Mize': 2041,
 'Dave Barr': 'Never',
 'Don Pooley': 'Never',
 'Fuzzy Zoeller': 'Never',
 'Larry Nelson': 2064,
 'John Mahaffey': 'Never',
 'Scott Simpson': 2038,
 'Jack Renner': 'Never',
 'Jay Haas': 2041,
 'Nick Price': 2060,
 'T.C. Chen': 'Never',
 'Mark McCumber': 'Never',
 'Frank Conner': 'Never',
 'John Cook': 2047,
 'Ed Dougherty': 'Never',
 'Raymond Floyd': 2034,
 'Billy Pierot': 'Never',
 'Chip Beck': 'Never',
 'Ed Fiori': 'Never',


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

predicted_years = {}

for player in df['player'].unique():
    player_data = df[df['player'] == player]
    player_data = player_data[['year', 'drive_avg']]
    player_data.dropna(inplace=True)

    if len(player_data) < 2 or player_data['drive_avg'].max() < 317:
        predicted_years[player] = 'Never'
        continue

    X = player_data['year'].values.reshape(-1, 1)
    y = player_data['drive_avg'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)

    if mae > 10:
        predicted_years[player] = 'Unpredictable'
        continue

    year = player_data['year'].max()
    drive_avg = player_data['drive_avg'].max()

    while drive_avg < 317:
        year += 1
        drive_avg = model.predict([[year]])[0]

    predicted_years[player] = year

predicted_years

{'Payne Stewart': 'Never',
 'Dan Pohl': 'Never',
 'Lanny Wadkins': 'Never',
 'Paul Azinger': 'Never',
 'David Frost': 'Never',
 'Bernhard Langer': 'Never',
 'Curtis Strange': 'Never',
 'Hal Sutton': 'Never',
 'Scott Hoch': 'Never',
 "Mark O'Meara": 'Never',
 'Steve Pate': 'Never',
 'Craig Stadler': 'Never',
 'Tom Kite': 'Never',
 'Fred Couples': 'Never',
 'Tom Watson': 'Never',
 'Ben Crenshaw': 'Never',
 'Gil Morgan': 'Never',
 'Greg Norman': 'Never',
 'Bobby Wadkins': 'Never',
 'Hale Irwin': 'Never',
 'Corey Pavin': 'Never',
 'Mike Reid': 'Never',
 'Richard Zokol': 'Never',
 'Larry Mize': 'Never',
 'Dave Barr': 'Never',
 'Don Pooley': 'Never',
 'Fuzzy Zoeller': 'Never',
 'Larry Nelson': 'Never',
 'John Mahaffey': 'Never',
 'Scott Simpson': 'Never',
 'Jack Renner': 'Never',
 'Jay Haas': 'Never',
 'Nick Price': 'Never',
 'T.C. Chen': 'Never',
 'Mark McCumber': 'Never',
 'Frank Conner': 'Never',
 'John Cook': 'Never',
 'Ed Dougherty': 'Never',
 'Raymond Floyd': 'Never',
 'Billy Pierot': 