In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress
import re

In [None]:
data = pd.read_csv("dog_rates_tweets.csv", parse_dates=["created_at"])
def extract_rating(text):
    match = re.search(r'(\d+(\.\d+)?)/10', text)
    if match:
        return float(match.group(1))
    return None

In [None]:
data['rating'] = data['text'].apply(extract_rating)

data = data[data['rating'].notnull()]



In [None]:
data = data[data['rating'] <= 25]

# Add a timestamp column for linear regression
data['timestamp'] = data['created_at'].apply(lambda x: x.timestamp())


In [None]:
fit = linregress(data['timestamp'], data['rating'])

# Add the predicted values to the DataFrame
data['prediction'] = fit.slope * data['timestamp'] + fit.intercept

In [None]:
print(data.head())

In [None]:
print("Slope:", fit.slope)
print("Intercept:", fit.intercept)

In [None]:
plt.figure(figsize=(10, 6))
plt.xticks(rotation=25)  # Rotate x-axis labels
plt.plot(data['created_at'], data['rating'], 'b.', alpha=0.5)  # Scatter points
plt.plot(data['created_at'], data['prediction'], 'r-', linewidth=3)  # Best-fit line
plt.xlabel("Date")
plt.ylabel("Rating")
plt.title("Dog Ratings Over Time")
plt.show()