My pseudocode for getting betting data for one season - 

For each team:

    For each game:
    
        1. Get date and opposing team
        
        2. Get list of games already in CSV which occurred on that date
        
        3. If no such games exists,
        
            add this game to the list
        
        4. Else
            
            a. search if this particular game exists
            
            If no, add this game to list
            
            If yes, 
            
                if current team is home team, populate homeLine column.
                if current team is away team, populate awayLine column

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import os
import shutil
import unicodedata
import mlbgame
import glob

In [293]:
teams = {
    'CLE': 2980,
    'CHW': 2974
}

In [294]:
rows_list = []
rows_list.append({
            'date':'11/11/1111',
            'homeTeam': 'Hi',
            'awayTeam': 'Hi',
            'homeScore':0,
            'awayScore': 0,
            'homePitcher':'Hi',
            'awayPitcher':'Hi',
            'homeLine': 0,
            'awayLine': 0
        })
print (rows_list)
f = pd.DataFrame(rows_list)
f.to_csv('test.csv',  index = None, columns = ['date', 'homeTeam', 'awayTeam', 'homeScore', 'awayScore', 'homePitcher',
                                              'awayPitcher', 'homeLine', 'awayLine'])

[{'homeLine': 0, 'homePitcher': 'Hi', 'awayTeam': 'Hi', 'date': '11/11/1111', 'awayScore': 0, 'homeScore': 0, 'homeTeam': 'Hi', 'awayLine': 0, 'awayPitcher': 'Hi'}]


In [295]:
def get_current_home_away_teams(team, opposing_team):
    home = ""
    away = ""
    if opposing_team[:2] == "@ ":
        home = opposing_team[2:]
        away = team
    else:
        home = team
        away = opposing_team
        
    return home, away

In [296]:
def get_scores(scores, isCurrentTeamHome):
    scores = unicodedata.normalize("NFKD", scores.strip())
    scores = scores.split(" ")[1].split("-")
    if (isCurrentTeamHome):
        home_score = scores[0]
        away_score = scores[1]
    else:
        home_score = scores[1]
        away_score = scores[0]
    return home_score, away_score

In [297]:
def get_moneylines(moneyline, isCurrentTeamHome):
    if (isCurrentTeamHome):
        homeline = moneyline
        awayline = 0
    else:
        homeline = 0
        awayline = moneyline
    return homeline, awayline

In [298]:
def scrape_info_for_team(team):
    
    #first reading existing csv
    df = pd.read_csv('test.csv')
    
    #get webpage for current team
    query = 'https://www.covers.com/pageLoader/pageLoader.aspx?page=/data/mlb/teams/pastresults/2016/team' + str(teams[team])+'.html'
    r = requests.get(query)
    soup = BeautifulSoup(r.text, "html.parser")
    
    #getting all games played by team
    games = soup.find_all("tr", class_=lambda x: x !='datahead')
    games_to_be_added = []
    
    for game in games[:15]:
        current_date = ""
        current_home_team = ""
        current_away_team=""
        current_home_score=0
        current_away_score=0
        current_home_pitcher=""
        current_away_pitcher=""
        current_home_line = 0
        current_away_line = 0
        
        
        cells = game.find_all(class_='datacell')
        
        #get date (first check for existence of game in database)
        current_date = unicodedata.normalize("NFKD", (cells[0].text).strip())
        
        #get teams (second check)
        opposing_team = (cells[1].text).strip()
        current_home_team, current_away_team = get_current_home_away_teams(team, opposing_team)
        
        #get scores
        scores = (cells[2].text)
        current_home_score, current_away_score = get_scores(scores, (current_home_team==team))
        
        #get pitchers
        if (cells[3].a):   
            current_away_pitcher = (cells[3].a.text).strip()
        else:
            current_away_pitcher = (cells[3].text).strip()
        if (cells[4].a):
            current_home_pitcher = (cells[4].a.text).strip()
        else:
            current_home_pitcher = (cells[4].text).strip()
        
        #get line for current team and current game
        current_home_line, current_away_line = get_moneylines((cells[5].text).strip()[2:], (current_home_team==team))
        
        
        
        #see if there are any games in existing CSV which were played on this date
        games_on_this_date = df[df['date']==current_date]
        
        #if existing csv has no games played on that date, add current game to list
        if (games_on_this_date.shape[0]==0):
            games_to_be_added.append({
                    'date':current_date,
                    'homeTeam': current_home_team,
                    'awayTeam': current_away_team,
                    'homeScore': current_home_score,
                    'awayScore': current_away_score,
                    'homePitcher': current_home_pitcher,
                    'awayPitcher': current_away_pitcher,
                    'homeLine': current_home_line,
                    'awayLine': current_away_line
                })
        else: #there are games in CSV which have been played on this date
            
            #check if this exact game is in database
            games_with_same_home_team = games_on_this_date[games_on_this_date['homeTeam']==current_home_team]
            games_with_these_teams = games_with_same_home_team[games_with_same_home_team['awayTeam']==current_away_team]
            if (games_with_these_teams.shape[0]==0): #this exact game is NOT already in database
                    games_to_be_added.append({
                        'date':current_date,
                        'homeTeam': current_home_team,
                        'awayTeam': current_away_team,
                        'homeScore': current_home_score,
                        'awayScore': current_away_score,
                        'homePitcher': current_home_pitcher,
                        'awayPitcher': current_away_pitcher,
                        'homeLine': current_home_line,
                        'awayLine': current_away_line
                })
            else: #this exact IS in database. Need to update moneyline
                
                
            
    df2 = pd.DataFrame(games_to_be_added)
    df = df.append(df2, ignore_index=True)
    return df
        

In [300]:
x = scrape_info_for_team('CHW')
x.to_csv('test.csv', index=None, columns = ['date', 'homeTeam', 'awayTeam', 'homeScore', 'awayScore', 'homePitcher',
                                           'awayPitcher', 'homeLine', 'awayLine'])