In [35]:
import networkx as nx
import toolz
import json
from itertools import chain
from operator import itemgetter

In [29]:
#we load the data.
with open('nflresults.json', 'r',errors='ignore') as nfl:
    results = json.load(nfl)
print(results[:1])

[[{'weight': 234, 'lastName': 'Penny', 'status': 'Active', 'gsisName': '', 'birthDate': '8/17/1993', 'playerId': '00-0032813', 'firstName': 'Elijhaa', 'position': 'RB', 'height': 73, 'uniformNumber': 35, 'team': 'ARI', 'profileId': 2556762, 'yearsPro': 1, 'fullName': 'Elijhaa Penny', 'college': 'Idaho', 'profileUrl': 'http://www.nfl.com/player/elijhaapenny/2556762/profile'}, {'weight': 224, 'lastName': 'Johnson', 'status': 'InjuredReserve', 'gsisName': '', 'birthDate': '12/16/1991', 'playerId': '00-0032187', 'firstName': 'David', 'position': 'RB', 'height': 73, 'uniformNumber': 31, 'team': 'ARI', 'profileId': 2553435, 'yearsPro': 3, 'fullName': 'David Johnson', 'college': 'Northern Iowa', 'profileUrl': 'http://www.nfl.com/player/davidjohnson/2553435/profile'}, {'weight': 331, 'lastName': 'Iupati', 'status': 'InjuredReserve', 'gsisName': 'M.Iupati', 'birthDate': '5/12/1987', 'playerId': '00-0027869', 'firstName': 'Mike', 'position': 'OG', 'height': 77, 'uniformNumber': 76, 'team': 'ARI'

In [30]:
#we create a list of tuples with the edges of our graph, NFL team,
#and College. Looking at the team players, we are going to sort 
#the different origin of the players of every team.
teams=list(chain.from_iterable(results))

teams_hash=toolz.groupby('team', teams)
graph_results=[]
for key, team in teams_hash.items():
    team_name = key
    college_unique=set()
    for college in team:
        if college['college'] not in college_unique:
            graph_results.append(tuple([team_name,college['college']]))
        else:
            college_unique.add(college['college'])
print(graph_results)

[('MIN', 'Notre Dame'), ('MIN', 'Texas'), ('MIN', 'Texas-San Antonio'), ('MIN', 'Michigan State'), ('MIN', 'UCLA'), ('MIN', 'Northern Colorado'), ('MIN', 'Kansas State'), ('MIN', 'Iowa'), ('MIN', 'Houston'), ('MIN', 'Nebraska'), ('MIN', 'Notre Dame'), ('MIN', 'San Diego State'), ('MIN', 'Minnesota State'), ('MIN', 'Connecticut'), ('MIN', 'Cincinnati'), ('MIN', 'Notre Dame'), ('MIN', 'Washington'), ('MIN', 'Michigan Tech'), ('MIN', 'Oklahoma'), ('MIN', 'Clemson'), ('MIN', 'UCLA'), ('MIN', 'Missouri'), ('MIN', 'Boston College'), ('MIN', 'Oregon State'), ('MIN', 'Vanderbilt'), ('MIN', 'Augustana (S.D.)'), ('MIN', 'Rice'), ('MIN', 'Florida State'), ('MIN', 'Portland State'), ('MIN', 'USC'), ('MIN', 'Mississippi'), ('MIN', 'Georgia Southern'), ('MIN', 'Harvard'), ('MIN', 'Iowa'), ('MIN', 'Michigan'), ('MIN', 'Minnesota'), ('MIN', 'Miami (Fla.)'), ('MIN', 'Miami (Fla.)'), ('MIN', 'Kansas State'), ('MIN', 'Texas A&M'), ('MIN', 'East Carolina'), ('MIN', 'Ohio State'), ('MIN', 'Southern Mississ

In [37]:
#we create our graph.
NFLGraph = nx.Graph()
NFLGraph.add_edges_from(graph_results)
print(nx.info(Graph))

Name: 
Type: Graph
Number of nodes: 306
Number of edges: 1554
Average degree:  10.1569


In [48]:
#Degress of Centrality.
results= sorted(nx.degree_centrality(G=NFLGraph).items(),key=itemgetter(1),
             reverse=True)

#Teams with most heterogenous players origin.
teams_centrality = [register for register in results if len(register[0])<=3]
print(teams_centrality[:5])
#SF Giants , Washington Redkings and Detroit Lions.

#Colleges that provide players to more teams.
teams_centrality = [register for register in results if len(register[0])>3]
print(teams_centrality[:5])
#Florida, Ohio State and Miami.


[('SF', 0.18032786885245902), ('WAS', 0.18032786885245902), ('DET', 0.18032786885245902), ('NYJ', 0.17704918032786884), ('SEA', 0.17704918032786884)]
[('Florida', 0.08852459016393442), ('Ohio State', 0.08196721311475409), ('Miami (Fla.)', 0.07868852459016393), ('Alabama', 0.07868852459016393), ('Michigan', 0.06885245901639345)]


In [51]:
#Betweness.
results = sorted(nx.betweenness_centrality(G=NFLGraph).items(),key=itemgetter(1),
             reverse=True)

#Teams with best connected  with different  colleges origin.
teams_betweness = [register for register in results if len(register[0])<=3]
print(teams_betweness[:5])

#Best connected colleges with teams.
college_betweness = [register for register in results if len(register[0])>3]
print(college_betweness[:5])


[('IND', 0.07351727630348392), ('NYJ', 0.0707289434377185), ('NYG', 0.06151476872598788), ('ARI', 0.06020776597717756), ('TB', 0.05954842938138588)]
[('Florida', 0.027593951691461494), ('Ohio State', 0.023360924788243437), ('Miami (Fla.)', 0.019852553139296087), ('Alabama', 0.019600859491233122), ('Penn State', 0.015617110650877964)]


In [52]:
#Clossness.
results= sorted(nx.closeness_centrality(G=NFLGraph).items(),key=itemgetter(1),
             reverse=True)

#Teams with the nearest connection to the different colleges.
teams_clossness = [register for register in results if len(register[0])<=3]
print(teams_clossness[:5])

#Colleges with the nearest connection to all the teams.
colleges_clossness = [register for register in results if len(register[0])>3]
print(colleges_clossness[:5])


[('LSU', 0.4895666131621188), ('USC', 0.48183254344391785), ('SF', 0.3935483870967742), ('WAS', 0.3935483870967742), ('DET', 0.3935483870967742)]
[('Florida', 0.5041322314049587), ('Ohio State', 0.49114331723027377), ('Miami (Fla.)', 0.47433903576982894), ('Alabama', 0.4728682170542636), ('Texas', 0.4572713643178411)]


In [53]:
#we save the graph.
nx.write_gpickle(NFLGraph,'nflgraph.gpickle.1')