In [None]:
# pip install psycopg2

In [1]:
import psycopg2
import pandas as pd
import numpy as np
from config import db_password
from sqlalchemy import create_engine

import time
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [None]:
credentials = f"postgres://postgres:{db_password}@127.0.0.1:5432/video_game"

In [None]:
psycopg2.connect(credentials)

In [None]:
# Connection parameters, yours will be different
# params_dic = {
#     "host"      : "localhost",
#     "database"  : "video_game",
#     "port"      : 5432,
#     "user"      : "postgres",
#     "password"  : db_password
# }
def connect(credentials):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(credentials)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
#         sys.exit(1) 
    print("Connection successful")
    return conn

In [None]:
#A function to conect a db and bring in Data into a panda df 
def postgresql_to_dataframe(conn, select_query, column_names):
    """
    Tranform a SELECT query into a pandas dataframe
    """
    cursor = conn.cursor()
    try:
        cursor.execute(select_query)
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        cursor.close()
        return 1
    
    # Naturally we get a list of tupples
    tupples = cursor.fetchall()
    cursor.close()
    
    # We just need to turn it into a pandas dataframe
    df = pd.DataFrame(tupples, columns=column_names)
    return df

In [None]:
# Connect to the database
conn = connect(credentials)
column_names = ["Rank", "Name", "Platform", "Year", "Genre", "Publisher", "NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]
# Execute the "SELECT *" query
df = postgresql_to_dataframe(conn, "select * from video_game_data", column_names)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'vgsales_user_score'

In [None]:
#Removing the N/A from Year Data
df=df[df["Year"].str.contains("N/A")==False]

In [None]:
#Removing the N/A from Publisher Data
df=df[df["Publisher"].str.contains("N/A")==False]

In [None]:
#Identifying Null in Data 
df.isnull().sum()

In [None]:
#Change year to integer 
df["Year"] = df['Year'].astype('int')

In [None]:
df

In [None]:
# Change Year Columns to Datetime 
df["Year"] = pd.to_datetime(df.Year, format='%Y')

In [None]:
df.info()

In [None]:
# The target variable is NA_Sales, Global_Sales in the linear regression model 
plt.scatter(df.NA_Sales, df.Global_Sales)
plt.xlabel('NA Sales')
plt.ylabel('Global Sales')
plt.show()

In [None]:
#formats the data to meet the requirements of the Scikit-learn library
X = df.NA_Sales.values.reshape(-1, 1)

In [None]:
#X = df[["Name", "Platform", "Year", "Genre", "Publisher", "NA_Sales", "EU_Sales", "JP_Sales", "Other_Sales", "Global_Sales"]]

In [None]:
#Examine the first five entries in X
X[:5]

In [None]:
#examine the shape of X
X.shape

In [None]:
#target variable, or the Global_Sales column
y = df.Global_Sales

In [None]:
#create an instance of the linear regression model
model = LinearRegression()

In [None]:
# fitting or training the Model 
model.fit(X, y)

In [None]:
#the model will predict the Model
y_pred = model.predict(X)
print(y_pred.shape)

In [None]:
#Predictions for years of experience
plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.show()

In [None]:
#examine the specific parameters of our model: the slope and the y-intercept. The slope is represented by model.coef_, and model.intercept_ is the y-intercept:
print(model.coef_)
print(model.intercept_)

In [None]:
#Print the Labels and Data from the Model 
print(f"Labels: {y[:10]}")
print(f"Data: {X[:10]}")