Permalink
Browse files

[FIX]

  • Loading branch information...
1 parent a49a2fc commit 1c66c023d4a05df03715bfa810545289077e163b Elad Meidar committed Jun 28, 2014
View
@@ -0,0 +1,2 @@
+--color
+--format documentation
@@ -13,7 +13,8 @@ module Transfermarkt
USER_AGENT = "Firefox"
def Transfermarkt.base_uri
- "http://pipeline.bascout.com/"
+ #{}"http://pipeline.bascout.com/"
+ "http://transfermarkt.co.uk"
end
def self.test_fetch_league
@@ -17,9 +17,9 @@ def self.fetch_by_club_uri(club_uri, fetch_players = false)
options = {}
options[:club_uri] = club_uri
- options[:name] = club_html.xpath('//*[@id="vereinsinfo"]').text
- options[:country] = club_html.xpath('//*[@id="centerbig"]//form//table//tr[1]//td[2]//h1//a[2]').text
- options[:player_uris] = club_html.xpath('//table[@id="spieler"]//tr//td//table//tr//td[2]//a[contains(@href,"profil")]').collect{|player_html| player_html["href"]}
+ options[:name] = club_html.xpath('//*[@class="spielername-profil"]').text.strip
+ options[:country] = club_html.xpath('//*[@id="land_select_breadcrumb"]//option[@selected="selected"]').text.strip
+ options[:player_uris] = club_html.xpath('//*[@id="yw1"]//table//tr//td[2]//a[contains(@href,"profil")]').collect{|player_html| player_html["href"]}
puts "found #{options[:player_uris].count} players"
options[:players] = []
@@ -17,8 +17,17 @@ class Player < Transfermarkt::EntityBase
def initialize(options = {})
super
- self.market_value = self.market_value.to_s.gsub(".", "").to_i
+
+ encoding_options = {
+ :invalid => :replace, # Replace invalid byte sequences
+ :undef => :replace, # Replace anything not defined in ASCII
+ :replace => '', # Use a blank for those replacements
+ :universal_newline => true # Always break lines with \n
+ }
+ self.age = self.age.to_i
+ self.market_value = self.market_value.to_s.gsub(",", "").to_i
self.height = self.height.to_s.gsub(",", "").to_i
+ self.nationality = self.nationality.to_s.encode(Encoding.find('ASCII'), encoding_options).split("\n").collect(&:strip)
end
def self.fetch_by_profile_uri(profile_uri = "")
@@ -32,54 +41,54 @@ def self.fetch_by_profile_uri(profile_uri = "")
options = {}
options[:profile_uri] = profile_uri
- options[:club] = profile_html.xpath('//*[@id="centerbig"]//div[1]//div//table//tr[2]//td//a[1]').text
- options[:full_name] = profile_html.xpath('//*[@id="centerbig"]//div[1]//div//table//tr[1]//td[2]//h1').text.gsub(/[\d]/, "").strip
- options[:picture] = profile_html.xpath('//*[@id="centerbig"]//div[1]//table//tr//td[1]//img')[1]["src"]
- options[:name_in_native_country] = profile_html.xpath("//*[@class='given-name s10']").text
- headers = profile_html.xpath('//*[@id="centerbig"]//div[1]//table//tr//td[2]//table//tr//td[1]').collect(&:text)
- headers = headers.collect {|header| header.downcase.gsub(":", "").gsub(" ", "_").gsub("'s", "").to_sym}
+ # //*[@id="main"]/div[7]/div/div/div[2]/div[2]/div[2]/table/tbody/tr[2]/td/a
+ options[:club] = profile_html.xpath('//*[@id="main"]//div[7]//table//tr[2]//td//a').text
+ options[:position] = profile_html.xpath('//*[@id="main"]//div[7]//table[1]//tr[3]//td[1]')[1].text.strip
+ options[:full_name] = profile_html.xpath('//*[@class="spielername-profil"]').text.gsub(/[\d]/, "").strip
+
+ options[:picture] = profile_html.xpath('//*[@id="main"]//div[7]//div//div//div[2]//div[1]//img')[0]["src"]
+ options[:name_in_native_country] = profile_html.xpath('//*[@id="main"]//div[9]//div[1]//div[2]//div[2]//div[1]//div//table//tr[1]//td[1]')[0].text
- values = profile_html.xpath('//*[@id="centerbig"]//div[1]//table//tr//td[2]//table//tr//td[2]').collect(&:text)
- values = values.collect {|value| value.strip.match(/[A-Za-z0-9,. -]*/)[0] }
+ options[:market_value] = profile_html.xpath('//*[@id="main"]//div[7]//div//div//div[2]//div[3]//span//a').text.gsub(",", ".")
+
+ if options[:market_value].include?("Mil")
+ options[:market_value] = options[:market_value].to_f * 1_000_000
+ else
+ options[:market_value] = options[:market_value].to_f * 100_000
+ end
+ info_values = profile_html.xpath('//*[@id="main"]//div[9]//div[1]//div[2]//div[2]//div[1]//div//table//tr//td').collect(&:text).collect(&:strip)
+ info_headers = [:name_in_native_country, :date_of_birth, :place_of_birth, :age, :height, :nationality, :position, :foot]
+
+ player_info = Hash[info_headers.zip(info_values.slice(0..info_headers.size))]
+
# get player performance
options[:performance_data] = {}
- performance_uri = profile_uri.gsub("profil", "leistungsdaten")
+ performance_uri = profile_uri.gsub("profil", "leistungsdaten") + "/saison/"
- options = Hash[headers.zip(values)].merge(options)
+ years = (Time.now.year - 6..Time.now.year - 1).to_a
- # If there is a performance data blcok
- if profile_html.xpath('//*[@id="centerbig"]/div[4]/p[3]/a').any?
-
- perforamnce_types = []
- 10.times do |i|
- perforamnce_types << (Time.now.year - i).to_s
- end
-
- perforamnce_types.each do |type|
- performance_with_type_uri = ""
- if type == "All"
- performance_with_type_uri = performance_uri.gsub(".html", "_gesamt.html")
- else
- performance_with_type_uri = performance_uri.gsub(".html", "_#{type}.html")
- end
+ years.each do |year|
+ goalkeeper = options[:position] == "Goalkeeper"
+ options[:performance_data][year.to_s] = self.fetch_performance_data(performance_uri + year.to_s, goalkeeper)
+ end
- goalkeeper = options[:position] == "Goalkeeper"
- options[:performance_data][type] = self.fetch_performance_data(performance_with_type_uri, goalkeeper)
- end
- end
+ # Get injury data
- options[:injuries_data] = self.fetch_injuries_data(profile_html)
+ injury_uri = profile_uri.gsub("profil", "verletzungen")
+
+ options[:injuries_data] = self.fetch_injuries_data(injury_uri)
puts "fetched player #{options[:full_name]}"
- self.new(options)
+ self.new(player_info.merge(options))
end
end
private
def self.fetch_performance_data(performance_uri, is_goalkeeper = false)
+ puts "Fetching Performance page for #{performance_uri}"
req = self.get("/#{performance_uri}", headers: {"User-Agent" => Transfermarkt::USER_AGENT})
if req.code != 200
nil
@@ -89,31 +98,42 @@ def self.fetch_performance_data(performance_uri, is_goalkeeper = false)
performance_headers = if is_goalkeeper
[:competition, :matches, :goals, :own_goals, :assists, :yellow_cards, :second_yellows, :red_cards, :substituted_in, :substituted_out , :goals_conceded, :saves, :minutes]
else
- [:competition, :matches, :goals, :own_goals, :assists, :yellow_cards, :second_yellows, :red_cards, :substituted_in, :substituted_out, :minutes_per_goal, :minutes]
+ [:competition, :matches, :goals, :assists, :yellow_cards, :second_yellows, :red_cards, :minutes]
end
- performance_html.xpath('//table[@class="standard_tabelle"][1]//tr[position()>1]').each do |competition|
+ performance_html.xpath('//*[@id="yw2"]//table//tr[position()>1]').each do |competition|
values = Nokogiri::HTML::DocumentFragment.parse(competition.to_html).search("*//td").collect(&:text)
if values.first == ""
values.delete_at 0
end
- performance_data << Hash[performance_headers.zip(values)]
+ competition_performance = Hash[performance_headers.zip(values)]
+ competition_performance[:minutes] = competition_performance[:minutes].gsub(".", "").to_i
+ performance_data << competition_performance
end
- performance_data
end
return performance_data
end
- def self.fetch_injuries_data(player_html)
- injury_data = []
- injuries_headers = [:season, :from, :to, :injury]
-
- player_html.xpath('//*[@id="centerbig"]/div[4]/table[3]//tr[position()>1]').each do |injury_row|
- values = Nokogiri::HTML::DocumentFragment.parse(injury_row.to_html).search("*//td").collect(&:text)
- injury_data << Hash[injuries_headers.zip(values)]
+ def self.fetch_injuries_data(injury_uri)
+ req = self.get("/#{injury_uri}", headers: {"User-Agent" => Transfermarkt::USER_AGENT})
+ if req.code != 200
+ []
+ else
+ injury_data = []
+ player_html = Nokogiri::HTML(req.parsed_response)
+ injuries_headers = [:season, :injury, :from, :to, :days_out, :games_missed]
+
+ player_html.xpath('//*[@id="yw1"]//table//tr[position()>1]').each do |injury_row|
+ values = Nokogiri::HTML::DocumentFragment.parse(injury_row.to_html).search("*//td").collect(&:text)
+ injury_details = Hash[injuries_headers.zip(values)]
+ injury_details[:days_out] = injury_details[:days_out].strip.to_i
+ injury_details[:games_missed] = injury_details[:games_missed].strip.to_i
+ injury_data << injury_details
+ end
+ puts injury_data.inspect
+ injury_data
end
- injury_data
end
end
end
View
@@ -0,0 +1,8 @@
+require 'bundler/setup'
+Bundler.setup
+
+require 'transfermarkt' # and any other gems you need
+require 'fakeweb'
+RSpec.configure do |config|
+ # some (optional) config here
+end
@@ -0,0 +1,124 @@
+
+<!DOCTYPE html>
+<!-- paulirish.com/2008/conditional-stylesheets-vs-css-hacks-answer-neither/ -->
+<!--[if IE 7]> <html class="ie7 oldie" lang="en"> <![endif]-->
+<!--[if IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
+<!--[if gt IE 8]><!--> <html class="no-js" lang="en"> <!--<![endif]-->
+<head>
+ <meta charset="utf-8" />
+
+ <!-- Set the viewport width to device width for mobile -->
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" />
+ <title>Transfermarkt - Error</title>
+ <!-- SZM 2.0 (IVW/AGOF) -->
+<script type="text/javascript" src="https://script.ioam.de/iam.js"></script> <script type="text/javascript">
+
+ var _gaq = _gaq || [];
+ _gaq.push(['_setAccount', 'UA-3816204-13']);
+ _gaq.push(['_setDomainName', 'none']);
+ _gaq.push(['_setAllowLinker', true]);
+ _gaq.push(['_trackPageview']);
+ _gaq.push(['_trackPageLoadTime']);
+
+ (function() {
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
+ })();
+
+ </script>
+ <!--[if lte IE 8]>
+ <link rel="stylesheet" type="text/css" href="/css/ie.css" />
+ <![endif]-->
+ </head>
+<body>
+<div id="werbung_superbanner" class="hide-for-small">
+ <div class="werbung werbung-superbanner">
+<!--/* Sports 4.3 - Transfermarkt.de - Transfermarkt.de - 728x90 - 728x90 */-->
+
+<script type='text/javascript' src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=1ef2&_i=4064&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER'>
+</script><noscript><iframe id='cc32f1' name='cc32f1' src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=2efe&_i=4064&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' frameborder='0' scrolling='no' width='728' height='90'>
+<a href='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=3ef0&_n=cc32f1&_i=4064&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' target='_blank'>
+<img src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=4efc&_n=cc32f1&_i=4064&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' border='0' alt='' /></a>
+</iframe></noscript>
+
+<!--/* Sports 4.3 - Transfermarkt.de - Transfermarkt.de - 970x250 - 970x250 */-->
+
+<script type='text/javascript' src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=1f0e&_i=4065&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER'>
+</script><noscript><iframe id='65c671' name='65c671' src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=2f0b&_i=4065&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' frameborder='0' scrolling='no' width='970' height='250'>
+<a href='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=3f0c&_n=65c671&_i=4065&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' target='_blank'>
+<img src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=4f09&_n=65c671&_i=4065&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' border='0' alt='' /></a>
+</iframe></noscript>
+</div>
+</div>
+<div id="main">
+ <div class="row hide-on-print">
+ <div id="header" class="twelve columns">
+ <div class="naviback hide-for-small hide-for-print"></div>
+ <div style="position: absolute; top: 0; left: 100%; z-index: 1;" class="hide-for-small">
+ <div class="werbung werbung-skyscraper">
+<!--/* Sports 4.3 - Transfermarkt.de - Transfermarkt.de - 160x600 - 160x600 */-->
+
+<script type='text/javascript' src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=1f10&_i=4066&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER'>
+</script><noscript><iframe id='6a8af8' name='6a8af8' src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=2f1c&_i=4066&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' frameborder='0' scrolling='no' width='160' height='600'>
+<a href='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=3f1d&_n=6a8af8&_i=4066&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' target='_blank'>
+<img src='http://delivery.ads-creativesyndicator.com/adserver/tag.php?_t=4f1a&_n=6a8af8&_i=4066&_c=INSERT_CLICK_MACRO&_r=INSERT_RANDOM_NUMBER' border='0' alt='' /></a>
+</iframe></noscript>
+</div>
+ </div>
+ <div class="row">
+ <div class="four columns hide-for-small">
+ <a href="/" id="logo-home"><img title="Transfermarkt" alt="Transfermarkt" src="/images/logo.png" /></a>
+ <span id="domain">.CO.UK</span>
+ </div>
+ <div class="two columns header-social-icons hide-for-small">
+<###dynamic-0###> </div>
+ <div class="six columns mobile-four">
+ <form name="schnellsuche" id="schnellsuche" class="noclose" action="/schnellsuche/ergebnis/schnellsuche">
+ <!--<input type="text" name="suchbegriff" class="header-suche" placeholder="Suche nach Namen, Vereinen oder Statistiken..." />-->
+ <input type="text" name="query" class="header-suche" placeholder="Enter search term:" />
+ <input type="image" class="header-suche-abschicken" src="/images/suchicon.png" />
+ </form>
+ </div>
+ <div id="domains" class="noclose">
+ <a href="http://www.transfermarkt.de" class=""><img src="/images/flagge/verysmall/40.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>DE</span></a>
+ <a href="http://www.transfermarkt.at" class=""><img src="/images/flagge/verysmall/127.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>AT</span></a>
+ <a href="http://www.transfermarkt.ch" class=""><img src="/images/flagge/verysmall/148.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>CH</span></a>
+ <a href="http://www.transfermarkt.com.tr" class=""><img src="/images/flagge/verysmall/174.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>COM.TR</span></a>
+ <a href="http://www.transfermarkt.it" class=""><img src="/images/flagge/verysmall/75.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>IT</span></a>
+ <a href="http://www.transfermarkt.pl" class=""><img src="/images/flagge/verysmall/135.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>PL</span></a>
+ <a href="http://www.transfermarkt.co.uk" class="aktiv"><img src="/images/flagge/verysmall/189.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>CO.UK</span></a>
+ <a href="http://www.transfermarkt.es" class=""><img src="/images/flagge/verysmall/157.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>ES</span></a>
+ <a href="http://www.transfermarkt.nl" class=""><img src="/images/flagge/verysmall/122.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>NL</span></a>
+ <a href="http://www.transfermarkt.pt" class=""><img src="/images/flagge/verysmall/136.png" title="Deutschland" alt="Deutschland" id="" class="flaggenrahmen" /><span>PT</span></a>
+ </div>
+ </div>
+ </div>
+ </div>
+
+ <div class="row hide-on-print navihalter">
+ <div class="page_wrapper">
+ <div class="twelve columns">
+ <div class="megamenu_container megamenu_dark_bar megamenu_dark">
+ <###dynamic-1###> <a href="/" id="logo_klein" class="show-for-small"><img title="Transfermarkt" alt="Transfermarkt" src="/images/logo.png" /></a>
+ <div id="userprofil-box">
+ <###dynamic-2###> <###dynamic-3###> </div>
+ </div>
+ </div>
+ </div>
+ </div>
+
+ <div class="row popuphalter">
+ <###dynamic-4###> <###dynamic-5###> </div>
+
+ <div style="text-align:right"></div>
+ <div class="row">
+ <div id="breadcrumb" class="twelve columns">
+ <div class="breadcrumb-box">
+ <div class="breadcrumb">
+ <div class="breadcrumb-text home">
+ <a href="/">Home</a>
+ </div>
+<form class="breadcrumb-form breadcrumb-land" action="/en/jumplist/breadcrumb/site" method="post"> <div class="breadcrumb-select alternative-select chzn-land">
+ <h1>CDbException</h1>
+<p>CDbConnection failed to open the DB connection.</p>
Oops, something went wrong.

0 comments on commit 1c66c02

Please sign in to comment.