@@ -601,6 +601,112 @@ Namespace WikiBot
601601 End Function
602602
603603
604+ Function GetWikiExtractFromPageNames( ByVal pages As String (), ByVal charLimit As Integer ) As SortedList( Of String , String )
605+ Dim tpageres As New SortedList( Of String , String )
606+ For Each page As String In pages
607+ Dim tpage As Page = Getpage(page)
608+ If tpage.Exists Then
609+ Dim textract As WikiExtract = GetWikiExtractFromPage(tpage, charLimit)
610+ tpageres.Add(page, textract.ExtractContent)
611+ End If
612+ Next
613+ Return tpageres
614+ End Function
615+
616+ ''' <summary>
617+ ''' Obtiene la entradilla de varias páginas manteniendo el wikitexto pero eliminando plantillas y referencias.
618+ ''' </summary>
619+ ''' <returns></returns>
620+ Function GetWikiExtractFromPages( ByVal pages As String (), ByVal charLimit As Integer ) As HashSet( Of WikiExtract)
621+ Dim tlist As New List( Of Page)
622+ For Each page As String In pages
623+ Dim tpage As Page = Getpage(page)
624+ If tpage.Exists Then
625+ tlist.Add(tpage)
626+ End If
627+ Next
628+ Return GetWikiExtractFromPages(tlist.ToArray, charLimit)
629+ End Function
630+
631+ ''' <summary>
632+ ''' Obtiene la entradilla de varias páginas manteniendo el wikitexto pero eliminando plantillas y referencias.
633+ ''' </summary>
634+ ''' <returns></returns>
635+ Function GetWikiExtractFromPages( ByVal pages As Page(), ByVal charLimit As Integer ) As HashSet( Of WikiExtract)
636+ Dim tset As New HashSet( Of WikiExtract)
637+ For Each page As Page In pages
638+ Dim textract As WikiExtract = GetWikiExtractFromPage(page, charLimit)
639+ If Not textract Is Nothing Then
640+ tset.Add(textract)
641+ End If
642+ Next
643+ Return tset
644+ End Function
645+
646+ ''' <summary>
647+ ''' Obtiene la entradilla de una página manteniendo el wikitexto pero eliminando plantillas y referencias.
648+ ''' </summary>
649+ ''' <param name="page"></param>
650+ ''' <param name="charLimit"></param>
651+ ''' <returns></returns>
652+ Function GetWikiExtractFromPage( ByVal page As Page, ByVal charLimit As Integer ) As WikiExtract
653+ If page.Exists Then
654+ Dim pagethreads As String () = page.Threads
655+ Dim TreatedExtract As String = page.Content
656+ For Each thread As String In pagethreads
657+ TreatedExtract = TreatedExtract.Replace(thread, "" )
658+ Next
659+ Dim templates As String () = Template.GetTemplateTextArray(TreatedExtract).ToArray
660+ For Each temp As String In templates
661+ If Not (temp.ToUpper.StartsWith( "{{IPA|" ) Or
662+ temp.ToUpper.StartsWith( "{{NR|" ) Or
663+ temp.ToUpper.StartsWith( "{{MP|" ) Or
664+ temp.ToUpper.StartsWith( "{{NIHONGO|" )) Then 'Mantener pantillas de texto comunes
665+ TreatedExtract = TreatedExtract.Replace(temp, "" ).Trim()
666+ End If
667+ Next
668+ TreatedExtract = Regex.Replace(TreatedExtract, "(\n\{\|)([\s\S]+?)(\n\|\})" , "" )
669+ TreatedExtract = Regex.Replace(TreatedExtract, "<[rR]ef ?(|.+)>([\s\S]+?|)<\/[rR]ef>" , "" )
670+ TreatedExtract = Regex.Replace(TreatedExtract, "(<[Rr]ef.+?)(\/>)" , "" )
671+ TreatedExtract = Regex.Replace(TreatedExtract, "(\[\[[Cc]ategoría:)(.+?)(\]\])" , "" )
672+ TreatedExtract = Regex.Replace(TreatedExtract, "\[nota\ [0-9]+\]" , "" )
673+ TreatedExtract = Utils.RemoveExcessOfSpaces(TreatedExtract)
674+ TreatedExtract = Removefiles(TreatedExtract)
675+ TreatedExtract = TreatedExtract.Trim()
676+
677+ If TreatedExtract.Length > charLimit Then
678+ TreatedExtract = SafeTrimExtract(TreatedExtract.Substring( 0 , charLimit + 1 ), charLimit)
679+ End If
680+ 'Si el título de la página está en el resumen, coloca en negritas la primera ocurrencia
681+ Dim Extract As New WikiExtract With {
682+ .ExtractContent = TreatedExtract,
683+ .PageName = page.Title}
684+ Return Extract
685+ End If
686+ Return Nothing
687+ End Function
688+
689+
690+ Private Function Removefiles( ByVal str As String ) As String
691+ Dim tstr As String = str
692+ Do While True
693+ Dim match As Match = Regex.Match(tstr, "\[\[([Aa]rchivo:|[Ff]ile).+?\]\]" )
694+ If Not match.Success Then
695+ Exit Do
696+ End If
697+ Do While True
698+ Dim tmatch As Match = Regex.Match(tstr, "\[\[([Aa]rchivo:|[Ff]ile).+?\]\]" )
699+ If (Utils.CountOccurrences(tmatch.Value, "[[" ) = Utils.CountOccurrences(tmatch.Value, "]]" )) Then
700+ tstr = tstr.Replace(tmatch.Value, "" )
701+ Exit Do
702+ End If
703+ Dim fixedmatch As String = Utils.ReplaceLast(Utils.ReplaceLast(tmatch.Value, "[[" , "" ), "]]" , "" )
704+ tstr = tstr.Replace(tmatch.Value, fixedmatch)
705+ Loop
706+ Loop
707+ Return tstr
708+ End Function
709+
604710 ''' <summary>
605711 ''' Retorna los resúmenes de las páginas indicadas en el array de entrada como SortedList (con el formato {Página,Resumen}), los nombres de página deben ser distintos.
606712 ''' En caso de no existir el la página o el resumen, no lo retorna.
0 commit comments