Permalink
Browse files

- enhanced team detection

- fixed some bugs with the new design
- added pastebin / hastebin support
- extend on multiple team pastes
  • Loading branch information...
FullLifeGames committed Dec 17, 2017
1 parent 3b3ed0c commit 56fd79270bae4b47fc14294f83af418e741c7dc6
Showing with 212 additions and 22 deletions.
  1. +182 −16 Smogon Team Crawler/Program.cs
  2. +2 −0 Smogon Team Crawler/Team.cs
  3. +28 −6 Transform Output To Importable/Program.cs
@@ -21,6 +21,8 @@ class Program

private static WebClient client;

private static string[] hardCodedBlacklistedPastes = new string[] { "spEtvevT" };

static void Main(string[] args)
{
Dictionary<string, List<Team>> teamsForTiers = new Dictionary<string, List<Team>>();
@@ -113,11 +115,11 @@ static void Main(string[] args)
{
string tempInside = line.Substring(line.IndexOf("data-preview-url") + "data-preview-url".Length);
tempInside = tempInside.Substring(tempInside.IndexOf("\"") + 1);
if (!tempInside.Contains("preview"))
if (!tempInside.Contains("/preview"))
{
continue;
}
tempInside = tempInside.Substring(0, tempInside.IndexOf("preview"));
tempInside = tempInside.Substring(0, tempInside.IndexOf("/preview") + 1);
string url = "http://www.smogon.com" + tempInside;
Console.WriteLine("Currently Scanning: " + url);
int beforeCount = teamsForTiers[kv.Key].Count;
@@ -168,11 +170,11 @@ static void Main(string[] args)
{
string tempInside = line.Substring(line.IndexOf("data-preview-url") + "data-preview-url".Length);
tempInside = tempInside.Substring(tempInside.IndexOf("\"") + 1);
if (!tempInside.Contains("preview"))
if (!tempInside.Contains("/preview"))
{
continue;
}
tempInside = tempInside.Substring(0, tempInside.IndexOf("preview"));
tempInside = tempInside.Substring(0, tempInside.IndexOf("/preview") + 1);
string url = "http://www.smogon.com" + tempInside;
Console.WriteLine("Currently Scanning: " + url);
if (!rmtForTiers.ContainsKey(prefix))
@@ -330,11 +332,13 @@ private static void AnalyzeRMTTopic(string url, string prefix, Dictionary<string

bool timerHeader = false;

string lastLine = "";

List<string> currentTeams = new List<string>();

foreach (string line in site.Split('\n'))
{
HandleLine(url, prefix, rmtForTiers, pageCount, ref blockStarted, ref blockText, ref postStarted, ref postLink, ref postLikes, ref postDate, ref postedBy, ref likeStarted, ref timerHeader, currentTeams, line);
HandleLine(url, prefix, rmtForTiers, pageCount, ref blockStarted, ref blockText, ref postStarted, ref postLink, ref postLikes, ref postDate, ref postedBy, ref likeStarted, ref timerHeader, currentTeams, line, ref lastLine);
}
}
}
@@ -382,7 +386,7 @@ private static string GetTeamLineupString(string teamString)
{
mon = mon.Substring(0, mon.IndexOf("@")).Trim();
}
if (mon.Contains("("))
if (mon.Contains("(") && mon.Contains(")"))
{
mon = mon.Substring(mon.IndexOf("(") + 1);
mon = mon.Substring(0, mon.IndexOf(")"));
@@ -430,21 +434,25 @@ private static void AnalyzeTopic(string url, string tier, Dictionary<string, Lis

bool timerHeader = false;

string lastLine = "";

List<string> currentTeams = new List<string>();

foreach (string line in site.Split('\n'))
{
HandleLine(url, tier, teamsForTiers, pageCount, ref blockStarted, ref blockText, ref postStarted, ref postLink, ref postLikes, ref postDate, ref postedBy, ref likeStarted, ref timerHeader, currentTeams, line);
HandleLine(url, tier, teamsForTiers, pageCount, ref blockStarted, ref blockText, ref postStarted, ref postLink, ref postLikes, ref postDate, ref postedBy, ref likeStarted, ref timerHeader, currentTeams, line, ref lastLine);
}
}
}
catch (WebException)
catch (WebException e)
{
Console.WriteLine("WebException bei: " + url);
Console.WriteLine(e.Message);
Console.ReadLine();
}
}

private static void HandleLine(string url, string tier, Dictionary<string, List<Team>> teamsForTiers, int pageCount, ref bool blockStarted, ref string blockText, ref bool postStarted, ref string postLink, ref int postLikes, ref DateTime postDate, ref string postedBy, ref bool likeStarted, ref bool timerHeader, List<string> currentTeams, string line)
private static void HandleLine(string url, string tier, Dictionary<string, List<Team>> teamsForTiers, int pageCount, ref bool blockStarted, ref string blockText, ref bool postStarted, ref string postLink, ref int postLikes, ref DateTime postDate, ref string postedBy, ref bool likeStarted, ref bool timerHeader, List<string> currentTeams, string line, ref string lastLine)
{
if (!postStarted)
{
@@ -470,8 +478,53 @@ private static void HandleLine(string url, string tier, Dictionary<string, List<
postStarted = false;
foreach (string team in currentTeams)
{
Team teamObject = new Team(team, postLikes, postDate, url + "page-" + pageCount + "#" + postLink, postedBy);
teamsForTiers[tier].Add(teamObject);
string tmpTeam = team;
bool moreTeams = false;
while (tmpTeam.Contains("==="))
{
moreTeams = true;
string teamLine = tmpTeam.Substring(tmpTeam.IndexOf("===") + "===".Length);
if (!teamLine.Contains("\n"))
{
break;
}
teamLine = teamLine.Substring(0, teamLine.IndexOf("\n"));

string teamTier = null;
if (teamLine.Contains("[") && teamLine.Contains("]"))
{
teamTier = teamLine.Substring(teamLine.IndexOf("[") + 1, teamLine.IndexOf("]") - teamLine.IndexOf("[") - 1);
teamLine = teamLine.Substring(teamLine.IndexOf("]") + 1);
}

string teamTitle = teamLine.Substring(0, teamLine.IndexOf("===")).Trim();

string fullTempTeam = tmpTeam.Substring(tmpTeam.IndexOf("===") + "===".Length);
fullTempTeam = fullTempTeam.Substring(fullTempTeam.IndexOf("\n") + 1);

if (fullTempTeam.Contains("==="))
{
fullTempTeam = fullTempTeam.Substring(0, fullTempTeam.IndexOf("==="));
}

Team teamObject = new Team(fullTempTeam, postLikes, postDate, url + "page-" + pageCount + "#" + postLink, postedBy);
teamObject.TeamTier = teamTier;
teamObject.TeamTitle = teamTitle;
teamsForTiers[tier].Add(teamObject);

tmpTeam = tmpTeam.Substring(tmpTeam.IndexOf("===") + "===".Length);
tmpTeam = tmpTeam.Substring(tmpTeam.IndexOf("\n") + 1);

if (tmpTeam.Contains("==="))
{
tmpTeam = tmpTeam.Substring(tmpTeam.IndexOf("==="));
}
}
if (!moreTeams)
{
Team teamObject = new Team(team, postLikes, postDate, url + "page-" + pageCount + "#" + postLink, postedBy);
teamsForTiers[tier].Add(teamObject);
}
}
currentTeams.Clear();
postLikes = 0;
@@ -535,9 +588,10 @@ private static void HandleLine(string url, string tier, Dictionary<string, List<
blockText += temp + "\n";
}
}
else if (blockStarted && line.Contains("</div></div>"))
else if (blockStarted && ((line.Trim().Replace("\t", "").Contains("</div>") && lastLine.Trim().Replace("\t", "").Contains("</div>")) || line.Contains("</div></div>")))
{
blockStarted = false;
blockText = blockText.Replace("\t", "");
blockText = blockText.Replace("<br />", "");
blockText = blockText.Replace("</div>", "");
if (IsTeam(blockText))
@@ -572,14 +626,100 @@ private static void HandleLine(string url, string tier, Dictionary<string, List<
pasteUrl = pasteUrl.Substring(0, nearest);
}
pasteUrl = "http://" + pasteUrl;
currentTeams.Add(GetTeamFromPasteURL(pasteUrl));
currentTeams.Add(GetTeamFromPokepasteURL(pasteUrl));
}
if (line.Contains("pastebin.com/"))
{
string pasteUrl = line.Substring(line.IndexOf("pastebin.com/"));
if (pasteUrl.Contains(" ") || pasteUrl.Contains("\"") || pasteUrl.Contains("<"))
{
int nearest = int.MaxValue;
int space = pasteUrl.IndexOf(" ");
int quotation = pasteUrl.IndexOf("\"");
int arrow = pasteUrl.IndexOf("<");

foreach (int pos in new int[] { space, quotation, arrow })
{
if (pos != -1 && pos < nearest)
{
nearest = pos;
}
}

pasteUrl = pasteUrl.Substring(0, nearest);
}
if (pasteUrl.Contains("/raw/"))
{
pasteUrl = "https://" + pasteUrl;
}
else
{
pasteUrl = "https://pastebin.com/raw/" + pasteUrl.Substring(pasteUrl.IndexOf("/") + 1);
}
string pasteString = GetTeamFromPastebinURL(pasteUrl);
if (IsTeam(pasteString))
{
bool blackListed = false;
foreach(string urlPart in hardCodedBlacklistedPastes)
{
if (pasteUrl.Contains(urlPart))
{
blackListed = true;
}
}
if (!blackListed)
{
currentTeams.Add(pasteString);
}
}
}
if (line.Contains("hastebin.com/"))
{
string pasteUrl = line.Substring(line.IndexOf("hastebin.com/"));
if (pasteUrl.Contains(" ") || pasteUrl.Contains("\"") || pasteUrl.Contains("<"))
{
int nearest = int.MaxValue;
int space = pasteUrl.IndexOf(" ");
int quotation = pasteUrl.IndexOf("\"");
int arrow = pasteUrl.IndexOf("<");

foreach (int pos in new int[] { space, quotation, arrow })
{
if (pos != -1 && pos < nearest)
{
nearest = pos;
}
}

pasteUrl = pasteUrl.Substring(0, nearest);
}
if (pasteUrl.Contains("/raw/"))
{
pasteUrl = "https://" + pasteUrl;
}
else
{
pasteUrl = "https://hastebin.com/raw/" + pasteUrl.Substring(pasteUrl.IndexOf("/") + 1);
}
string pasteString = GetTeamFromPastebinURL(pasteUrl);
if (IsTeam(pasteString))
{
currentTeams.Add(pasteString);
}
}
}
lastLine = line;
}

private static string GetTeamFromPasteURL(string pasteUrl)
private static string GetTeamFromPokepasteURL(string pasteUrl)
{
string site = client.DownloadString(pasteUrl);
string site = "";
try
{
site = client.DownloadString(pasteUrl);
}
catch (WebException)
{ }

string team = "";

@@ -606,9 +746,35 @@ private static string GetTeamFromPasteURL(string pasteUrl)
return team;
}

private static string GetTeamFromPastebinURL(string pasteUrl)
{
string site = "";
try
{
site = client.DownloadString(pasteUrl);
}
catch (WebException)
{}

string team = Regex.Replace(site, "<.*?>", String.Empty);

return team;
}

private static Regex doubleRowRegex = new Regex(@"\n\n");

private static bool IsTeam(string blockText)
{
return CountOccurences(blockText, "EVs: ") >= 6 && CountOccurences(blockText, "Nature") >= 6 && CountOccurences(blockText, "Ability: ") >= 6;
string[] split = doubleRowRegex.Split(blockText.Replace("\r", ""));
int countFullMoves = 0;
foreach(string mon in split)
{
if(CountOccurences(mon, "\n- ") >= 4)
{
countFullMoves++;
}
}
return countFullMoves >= 6 || (CountOccurences(blockText, "EVs: ") >= 6 && CountOccurences(blockText, "Nature") >= 6 && CountOccurences(blockText, "Ability: ") >= 6);
}

private static int CountOccurences(string haystack, string needle)
@@ -13,6 +13,8 @@ public class Team
public DateTime PostDate;
public string URL;
public string PostedBy;
public string TeamTitle = null;
public string TeamTier = null;

public double Koeffizient;
private static double koeffScale = 5;
@@ -29,7 +29,7 @@ static void Main(string[] args)
string tierDef = tier.Key;
foreach(Team team in tier.Value)
{
string[] lines = team.TeamString.Replace("\t", "").Split('\n');
string[] lines = team.TeamString.Replace("\t", "").Replace("\r", "").Split('\n');
bool skipTeam = false;
foreach(string line in lines)
{
@@ -65,9 +65,19 @@ static void Main(string[] args)
showdownTier = "[" + showdownTier + "] ";
}

importable += showdownTier;
if (team.TeamTier != null)
{
importable += "[" + team.TeamTier + "]";
}
else
{
importable += showdownTier;
}
importable += tierDef + "/";
importable += "Smogon Nr. " + smogonTeamCount + " " + team.Likes + " Likes " + ((int)(team.Koeffizient)) + " Score posted by " + team.PostedBy;

string teamString = "Smogon Nr. " + smogonTeamCount + " " + team.Likes + " Likes " + ((int)(team.Koeffizient)) + " Score posted by " + team.PostedBy + ((team.TeamTitle != null) ? (" " + team.TeamTitle) : "");
importable += teamString;
Console.WriteLine(teamString);

importable += " ===\n\n";

@@ -86,7 +96,7 @@ static void Main(string[] args)
string tierDef = tier.Key;
foreach (Team team in tier.Value)
{
string[] lines = team.TeamString.Replace("\t", "").Split('\n');
string[] lines = team.TeamString.Replace("\t", "").Replace("\r", "").Split('\n');
bool skipTeam = false;
foreach (string line in lines)
{
@@ -122,9 +132,19 @@ static void Main(string[] args)
showdownTier = "[" + showdownTier + "] ";
}

importable += showdownTier;
if (team.TeamTier != null)
{
importable += "[" + team.TeamTier + "]";
}
else
{
importable += showdownTier;
}
importable += tierDef + "/";
importable += "RMT Nr. " + rmtTeamCount + " " + team.Likes + " Likes " + ((int)(team.Koeffizient)) + " Score posted by " + team.PostedBy;

string teamString = "RMT Nr. " + rmtTeamCount + " " + team.Likes + " Likes " + ((int)(team.Koeffizient)) + " Score posted by " + team.PostedBy + ((team.TeamTitle != null) ? (" " + team.TeamTitle) : "");
importable += teamString;
Console.WriteLine(teamString);

importable += " ===\n\n";

@@ -215,6 +235,7 @@ private static string TranslateSmogonTeamsTier(string tier, string url)
{ "adv.", "gen3" },
{ "gsc", "gen2" },
{ "rby", "gen1" },
{ "stadium", "gen1" },
};

private static Dictionary<string, string> mappingWithSpace = new Dictionary<string, string>
@@ -229,6 +250,7 @@ private static string TranslateSmogonTeamsTier(string tier, string url)
{ "adv. ", "gen3" },
{ "gsc ", "gen2" },
{ "rby ", "gen1" },
{ "stadium ", "gen1" },
};

private static List<string> listOfTiers = new List<string>

0 comments on commit 56fd792

Please sign in to comment.