Skip to content
Permalink
Browse files

Initial commit

  • Loading branch information...
AdrienGuille committed Nov 20, 2014
1 parent 09f9dd8 commit fa5d32642e47f68183256ebac9cb79195f97bfae
Showing with 17,653 additions and 0 deletions.
  1. +22 −0 README.md
  2. +1 −0 autostart.bat
  3. +1 −0 autostart.sh
  4. BIN lib/HAC.jar
  5. BIN lib/commons-io-2.4.jar
  6. BIN lib/commons-math3-3.2.jar
  7. BIN lib/extcos-0.4b-all.jar
  8. BIN lib/gs-algo-1.2.jar
  9. BIN lib/gs-core-1.1.1.jar
  10. BIN lib/gs-core-1.2.jar
  11. BIN lib/gs-ui-1.2.jar
  12. BIN lib/jWave_java_groovy.jar
  13. BIN lib/jmod-1.2b.jar
  14. BIN lib/jtransforms-2.4.jar
  15. BIN lib/lucene-analyzers-3.6.2.jar
  16. BIN lib/lucene-core-3.6.2.jar
  17. BIN lib/lucene-smartcn-3.6.1.jar
  18. BIN lib/mysql-connector-java-5.1.27-bin.jar
  19. BIN lib/stanford-corenlp-1.3.4.jar
  20. BIN lib/xom.jar
  21. +1 −0 sample_messages_file.csv
  22. +3 −0 sample_network_file.csv
  23. +5 −0 sondy-config.properties
  24. +54 −0 src/cc/mallet/util/Util.java
  25. +70 −0 src/fr/ericlab/sondy/algo/AlgorithmParameter.java
  26. +190 −0 src/fr/ericlab/sondy/algo/eventdetection/Discrepancy.java
  27. +206 −0 src/fr/ericlab/sondy/algo/eventdetection/EDCoW.java
  28. +309 −0 src/fr/ericlab/sondy/algo/eventdetection/ET.java
  29. +94 −0 src/fr/ericlab/sondy/algo/eventdetection/EventDetectionAlgorithm.java
  30. +320 −0 src/fr/ericlab/sondy/algo/eventdetection/MABED.java
  31. +188 −0 src/fr/ericlab/sondy/algo/eventdetection/MACD.java
  32. +218 −0 src/fr/ericlab/sondy/algo/eventdetection/OnlineLDA.java
  33. +128 −0 src/fr/ericlab/sondy/algo/eventdetection/PeakyTopics.java
  34. +131 −0 src/fr/ericlab/sondy/algo/eventdetection/PersistentConversations.java
  35. +139 −0 src/fr/ericlab/sondy/algo/eventdetection/TrendingScore.java
  36. +42 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/CrossCorrelationZeroTime.java
  37. +140 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/Dwt.java
  38. +94 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/EDCoWEvent.java
  39. +116 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/EDCoWKeyword.java
  40. +168 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/ModularityDetection.java
  41. +88 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/SignalConstruction.java
  42. +61 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/Threshold.java
  43. +64 −0 src/fr/ericlab/sondy/algo/eventdetection/edcow/Vector.java
  44. +39 −0 src/fr/ericlab/sondy/algo/eventdetection/et/Burst.java
  45. +69 −0 src/fr/ericlab/sondy/algo/eventdetection/et/Bursts.java
  46. +56 −0 src/fr/ericlab/sondy/algo/eventdetection/et/ETEvent.java
  47. +53 −0 src/fr/ericlab/sondy/algo/eventdetection/et/ETExperiment.java
  48. +39 −0 src/fr/ericlab/sondy/algo/eventdetection/et/Frequency.java
  49. +101 −0 src/fr/ericlab/sondy/algo/eventdetection/et/OverallDissimilarity.java
  50. +83 −0 src/fr/ericlab/sondy/algo/eventdetection/mabed/MABEDTimeInterval.java
  51. +171 −0 src/fr/ericlab/sondy/algo/eventdetection/mabed/MABEDTopic.java
  52. +285 −0 src/fr/ericlab/sondy/algo/eventdetection/mabed/MABEDTopicGraph.java
  53. +56 −0 src/fr/ericlab/sondy/algo/eventdetection/mabed/MABEDTopicList.java
  54. +53 −0 src/fr/ericlab/sondy/algo/eventdetection/mabed/MABEDWeightedTerm.java
  55. +69 −0 src/fr/ericlab/sondy/algo/eventdetection/mabed/MABEDWeightedTermList.java
  56. +51 −0 src/fr/ericlab/sondy/algo/eventdetection/onlinelda/OnlineLDATopic.java
  57. +40 −0 src/fr/ericlab/sondy/algo/eventdetection/onlinelda/OnlineLDATopicList.java
  58. +74 −0 src/fr/ericlab/sondy/algo/networkanalysis/BetweennessCentrality.java
  59. +132 −0 src/fr/ericlab/sondy/algo/networkanalysis/KShellDecomposition.java
  60. +133 −0 src/fr/ericlab/sondy/algo/networkanalysis/LogKShellDecomposition.java
  61. +79 −0 src/fr/ericlab/sondy/algo/networkanalysis/NetworkAnalysisAlgorithm.java
  62. +80 −0 src/fr/ericlab/sondy/algo/networkanalysis/PageRank.java
  63. +82 −0 src/fr/ericlab/sondy/algo/networkanalysis/ReplayDiffusion.java
  64. +89 −0 src/fr/ericlab/sondy/algo/networkanalysis/SocialCapitalists.java
  65. +33 −0 src/fr/ericlab/sondy/algo/timeseries/CorrelationCoefficient.java
  66. +49 −0 src/fr/ericlab/sondy/algo/timeseries/ErdemCoefficient.java
  67. +59 −0 src/fr/ericlab/sondy/algo/timeseries/PearsonCoefficient.java
  68. +92 −0 src/fr/ericlab/sondy/app/Main.java
  69. +333 −0 src/fr/ericlab/sondy/core/AppVariables.java
  70. +619 −0 src/fr/ericlab/sondy/core/DataManipulation.java
  71. +325 −0 src/fr/ericlab/sondy/core/access/DBAccess.java
  72. +183 −0 src/fr/ericlab/sondy/core/access/IndexAccess.java
  73. +169 −0 src/fr/ericlab/sondy/core/access/MentionIndexAccess.java
  74. +36 −0 src/fr/ericlab/sondy/core/misc/DefaultStopWords.java
  75. +60 −0 src/fr/ericlab/sondy/core/misc/StopWords.java
  76. +96 −0 src/fr/ericlab/sondy/core/structure/Collection.java
  77. +166 −0 src/fr/ericlab/sondy/core/structure/Configuration.java
  78. +76 −0 src/fr/ericlab/sondy/core/structure/DetectionResult.java
  79. +60 −0 src/fr/ericlab/sondy/core/structure/LogEntry.java
  80. +71 −0 src/fr/ericlab/sondy/core/structure/Message.java
  81. +29 −0 src/fr/ericlab/sondy/core/structure/MessageSet.java
  82. +70 −0 src/fr/ericlab/sondy/core/structure/Point2D.java
  83. +39 −0 src/fr/ericlab/sondy/core/structure/SimpleTopic.java
  84. +43 −0 src/fr/ericlab/sondy/core/structure/TemporalPattern.java
  85. +51 −0 src/fr/ericlab/sondy/core/structure/TermInfo.java
  86. +73 −0 src/fr/ericlab/sondy/core/structure/Timeline.java
  87. +46 −0 src/fr/ericlab/sondy/core/structure/TimelineEvent.java
  88. +31 −0 src/fr/ericlab/sondy/core/structure/Topic.java
  89. +83 −0 src/fr/ericlab/sondy/core/structure/list/DetectionResultList.java
  90. +85 −0 src/fr/ericlab/sondy/core/structure/list/Point2DList.java
  91. +92 −0 src/fr/ericlab/sondy/core/structure/list/TermInfoList.java
  92. +663 −0 src/fr/ericlab/sondy/ui/DataUI.java
  93. +1,096 −0 src/fr/ericlab/sondy/ui/EventsUI.java
  94. +521 −0 src/fr/ericlab/sondy/ui/GlobalUI.java
  95. +611 −0 src/fr/ericlab/sondy/ui/NetworkUI.java
  96. +53 −0 src/fr/ericlab/sondy/ui/misc/ContextMenuListCell.java
  97. +65 −0 src/fr/ericlab/sondy/ui/misc/ContextMenuTableCell.java
  98. +101 −0 src/fr/ericlab/sondy/ui/misc/Credits.java
  99. +50 −0 src/fr/ericlab/sondy/ui/misc/DefaultListCell.java
  100. +56 −0 src/fr/ericlab/sondy/ui/misc/DefaultViewListener.java
  101. +108 −0 src/fr/ericlab/sondy/ui/misc/EditingCell.java
  102. +98 −0 src/fr/ericlab/sondy/utils/Utils.java
  103. +56 −0 src/javafx/embed/swing/BufferedImageView.java
  104. +151 −0 src/javafx/embed/swing/FXSwingKeyboardFocusManagerPeer.java
  105. +142 −0 src/javafx/embed/swing/KFMHelper.java
  106. +789 −0 src/javafx/embed/swing/KeyEventHandler.java
  107. +550 −0 src/javafx/embed/swing/ProxyGraphics.java
  108. +97 −0 src/javafx/embed/swing/ProxyWindow.java
  109. +507 −0 src/javafx/embed/swing/ProxyWindowPeer.java
  110. +124 −0 src/javafx/embed/swing/SwingEventDispatcherHelper.java
  111. +81 −0 src/javafx/embed/swing/SwingFX.java
  112. +291 −0 src/javafx/embed/swing/SwingView.java
  113. +732 −0 src/javafx/widget/DoubleSlider.java
  114. +264 −0 src/javafx/widget/DoubleSliderBehavior.java
  115. +395 −0 src/javafx/widget/DoubleSliderSkin.java
  116. +45 −0 src/resources/algo/README-online_lda.txt
  117. +342 −0 src/resources/algo/lda.py
  118. +27 −0 src/resources/algo/run_lda.sh
  119. +458 −0 src/resources/algo/stopwords.txt
  120. +204 −0 src/resources/algo/vocabulary.py
  121. +52 −0 src/resources/css/Chart.css
  122. +55 −0 src/resources/css/Style.css
  123. +15 −0 src/resources/css/Timeline.css
  124. +102 −0 src/resources/css/double_slider.css
  125. BIN src/resources/fonts/DroidSans.ttf
  126. BIN src/resources/fonts/OpenSans-Bold.ttf
  127. BIN src/resources/fonts/Raleway-Thin.otf
  128. BIN src/resources/images/app-logo-base.png
  129. BIN src/resources/images/mac_network.png
  130. BIN src/resources/images/nok.png
  131. BIN src/resources/images/ok.png
  132. +494 −0 src/resources/stopwords/common(cn)
  133. +478 −0 src/resources/stopwords/common(en)
  134. +24 −0 src/resources/stopwords/twitter(en)
  135. 0 src/resources/stopwords/twitter(fr)
@@ -0,0 +1,22 @@
SONDY
=====

An open source social media data mining software (event detection + influence analysis)

Author: Adrien GUILLE

Details of this program are described in the following paper:

Adrien Guille, Cécile Favre, Hakim Hacid, Djamel A. Zighed (2013)
SONDY: an open source platform for social dynamics mining and analysis.
In proceedings of the ACM International Conference on Management of Data (SIGMOD 2013),
pp. 1005-1008, DOI: 10.1145/2463676.2463694

Please cite this paper when using the application.

------------------------
Stanford CoreNLP library
------------------------

In order to be able to run SONDY, you should download the Stanford CoreNLP library at: http://nlp.stanford.edu/software/corenlp.shtml#Download, and add stanford-corenlp-1.3.x-models.jar
into the lib/ directory of SONDY.
@@ -0,0 +1 @@
java -Xms1G -Xmx4G -jar SONDY.jar
@@ -0,0 +1 @@
java -Xms1G -Xmx4G -jar SONDY.jar
BIN +26 KB lib/HAC.jar
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +311 KB lib/gs-algo-1.2.jar
Binary file not shown.
BIN +718 KB lib/gs-core-1.1.1.jar
Binary file not shown.
BIN +866 KB lib/gs-core-1.2.jar
Binary file not shown.
BIN +7.13 MB lib/gs-ui-1.2.jar
Binary file not shown.
Binary file not shown.
BIN +18.5 MB lib/jmod-1.2b.jar
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN +306 KB lib/xom.jar
Binary file not shown.
@@ -0,0 +1 @@
123456 2013-06-19 18:28:17 content of the message
@@ -0,0 +1,3 @@
123456 98765
123456 45670
102938 73529
@@ -0,0 +1,5 @@
workspace=
password=
host=
schema=
username=
@@ -0,0 +1,54 @@
/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */

package cc.mallet.util;

/**
*
*
* @author <a href="mailto:casutton@cs.umass.edu">Charles Sutton</a>
* @version $Id: ArrayUtils.java,v 1.1 2007/10/22 21:37:40 mccallum Exp $
*/
public class Util {
/**
* Returns the Jensen-Shannon divergence.
*/
public static double jensenShannonDivergence(double[] p1, double[] p2) {
assert(p1.length == p2.length);
double[] average = new double[p1.length];
for (int i = 0; i < p1.length; ++i) {
average[i] += (p1[i] + p2[i])/2;
}
return (klDivergence(p1, average) + klDivergence(p2, average))/2;
}


public static final double log2 = Math.log(2);
/**
* Returns the KL divergence, K(p1 || p2).
*
* The log is w.r.t. base 2. <p>
*
* *Note*: If any value in <tt>p2</tt> is <tt>0.0</tt> then the KL-divergence
* is <tt>infinite</tt>. Limin changes it to zero instead of infinite.
*
*/
public static double klDivergence(double[] p1, double[] p2) {


double klDiv = 0.0;

for (int i = 0; i < p1.length; ++i) {
if (p1[i] == 0) { continue; }
if (p2[i] == 0.0) { continue; } // Limin

klDiv += p1[i] * Math.log( p1[i] / p2[i] );
}

return klDiv / log2; // moved this division out of the loop -DM
}
}
@@ -0,0 +1,70 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/

package fr.ericlab.sondy.algo;

import javafx.beans.property.SimpleStringProperty;

////////////////////////////////////////////////////////////////////////////////
// This file is part of SONDY. //
// //
// SONDY is free software: you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation, either version 3 of the License, or //
// (at your option) any later version. //
// //
// SONDY is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with SONDY. If not, see <http://www.gnu.org/licenses/>. //
////////////////////////////////////////////////////////////////////////////////

/**
* Class that defines a generic parameter.
*
* @author Adrien GUILLE, Laboratoire ERIC, Université Lumière Lyon 2
*/

public class AlgorithmParameter {
private SimpleStringProperty parameter;
private SimpleStringProperty value;

/**
*
* @param p
* @param v
*/
public AlgorithmParameter(String p, String v){
parameter = new SimpleStringProperty(p);
value = new SimpleStringProperty(v);
}

/**
*
* @return
*/
public String getParameter() {
return parameter.get();
}

/**
*
* @return
*/
public String getValue() {
return value.get();
}

/**
*
* @param newValue
*/
public void setValue(String newValue){
value.set(newValue);
}
}
@@ -0,0 +1,190 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package fr.ericlab.sondy.algo.eventdetection;

import fr.ericlab.sondy.algo.AlgorithmParameter;
import fr.ericlab.sondy.core.DataManipulation;
import fr.ericlab.sondy.core.access.IndexAccess;
import fr.ericlab.sondy.core.structure.Collection;
import fr.ericlab.sondy.core.structure.DetectionResult;
import fr.ericlab.sondy.core.structure.Point2D;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map.Entry;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javafx.collections.FXCollections;
import javafx.collections.ObservableList;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;

////////////////////////////////////////////////////////////////////////////////
// This file is part of SONDY. //
// //
// SONDY is free software: you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation, either version 3 of the License, or //
// (at your option) any later version. //
// //
// SONDY is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details. //
// //
// You should have received a copy of the GNU General Public License //
// along with SONDY. If not, see <http://www.gnu.org/licenses/>. //
////////////////////////////////////////////////////////////////////////////////

/**
* @author Adrien GUILLE, Laboratoire ERIC, Université Lumière Lyon 2
*/

public class Discrepancy extends EventDetectionAlgorithm {

double minTermSupport = 0;
double maxTermSupport = 1.0;
int smooth = 0;

public String getName(){
return "Discrepancy Model";
}

public ObservableList<DetectionResult> apply() {
try {
if(parameters.get(0).getValue()!=null && !parameters.get(0).getValue().equals("")){
minTermSupport = Double.parseDouble(parameters.get(0).getValue());
}
if(parameters.get(1).getValue()!=null && !parameters.get(1).getValue().equals("")){
maxTermSupport = Double.parseDouble(parameters.get(1).getValue());
}
if(parameters.get(2).getValue()!=null && !parameters.get(2).getValue().equals("")){
smooth = Integer.parseInt(parameters.get(2).getValue());
}
long startNanoTime = System.nanoTime();
DataManipulation dataManipulation = new DataManipulation();
IndexAccess indexAccess = new IndexAccess(appVariables);
IndexReader r = indexAccess.reader;
TermEnum allTerms = r.terms();
HashMap<DetectionResult,Float> scores = new HashMap<>();
int m = r.numDocs();
float intervalDuration = ((float) appVariables.getCurrentDatasetInterval())/60;
int minTermOccur = (int)(minTermSupport * appVariables.nbMessages), maxTermOccur = (int)(maxTermSupport * appVariables.nbMessages);
while(allTerms.next()){
String term = allTerms.term().text();
if(term.length()>1 && !appVariables.isStopWord(term)){
TermDocs termDocs = r.termDocs(allTerms.term());
float frequency[] = indexAccess.getTermFrequency(appVariables, termDocs);
float cf = frequency[m];
if(cf>minTermOccur && cf<maxTermOccur){
if(smooth>0){
frequency = dataManipulation.getSmoothedTermFrequency(frequency, smooth);
}
float scoreSequence[] = new float[m];
for(int i=0; i<m; i++){
scoreSequence[i] = burstiness(m,cf,frequency[i]);
}
LinkedList<Point2D> I = new LinkedList<>();
LinkedList<Float> L = new LinkedList<>();
LinkedList<Float> R = new LinkedList<>();
for(int i=appVariables.startTimeSlice; i<=appVariables.endTimeSlice; i++){
if(scoreSequence[i]>0){
int k = I.size();
float Lk = 0, Rk = sum(scoreSequence,0,i);
if(i>0){
Lk = sum(scoreSequence,0,i-1);
}
int j = 0;
boolean foundJ = false;
for(int l=k-1; l>=0 && !foundJ; l--){
if(L.get(l)<Lk){
foundJ = true;
j = l;
}
}
if(foundJ && R.get(j)<Rk){
Point2D Ik = new Point2D(I.get(j).x,i);
for(int p = j; p<k; p++){
I.removeLast();
L.removeLast();
R.removeLast();
}
k = j;
I.add(Ik);
L.add(sum(scoreSequence,0,Ik.x-1));
R.add(sum(scoreSequence,0,Ik.y));
}else{
I.add(new Point2D(i,i));
L.add(Lk);
R.add(Rk);
}
}
}
if(I.size()>0){
Point2D maxI = I.get(0);
for(Point2D Ii : I){
if(sum(scoreSequence,Ii.x,Ii.y)>sum(scoreSequence,maxI.x,maxI.y)){
maxI.x = Ii.x;
maxI.y = Ii.y;
}
}
float startDay = (maxI.x*intervalDuration)/24;
float endDay = (maxI.y*intervalDuration)/24;
scores.put(new DetectionResult(term,formatter.format(startDay)+";"+formatter.format(endDay)),sum(scoreSequence,I.get(0).x,I.get(0).y));
}
}
}
}
indexAccess.close();
scores = Collection.getSortedMapDesc(scores);
Set<Entry<DetectionResult, Float>> entrySet = scores.entrySet();
results = FXCollections.observableArrayList();
for (Entry<DetectionResult, Float> entry : entrySet) {
results.add(0,entry.getKey());
}
long endNanoTime = System.nanoTime();
long elapsedNanoTime = endNanoTime - startNanoTime;
double elaspedSecondTime = (double)elapsedNanoTime/(double)1000000000;
appVariables.addLogEntry("[event detection] computed discrepancy model of burstiness, minTermSupport="+minTermSupport+", maxTermSupport="+maxTermSupport+". "+results.size()+" results in "+formatter.format(elaspedSecondTime)+"s");
return results;
} catch (IOException ex) {
Logger.getLogger(Discrepancy.class.getName()).log(Level.SEVERE, null, ex);
return null;
}
}

public float sum(float tab[], int a, int b){
float sum = 0;
for(int i = a; i<=b; i++){
sum += tab[i];
}
return sum;
}

public float burstiness(float m, float cf, int[] frequency, Point2D I){
float freqI = 0;
for(int i = I.x; i<=I.y; i++){
freqI += frequency[i];
}
return Math.abs((I.y-I.x+1)/m-freqI/cf);
}

public float burstiness(float m, float cf, float freqK){
return freqK/cf-1/m;
}

public Discrepancy() {
super();
parameters = FXCollections.observableArrayList(new AlgorithmParameter("minTermSupport",""),new AlgorithmParameter("maxTermSupport",""),new AlgorithmParameter("smooth",""));
algoDescription = "Identifies bursty intervals of terms using a discrepancy-based model of burstiness";
}

@Override
public String getReference() {
return "<li><b>Discrepancy Model of Burstiness:</b> T. Lappas, B. Arai, M. Platakis, D. Kotsakos, and D. Gunopulos. On burstiness-aware search for document sequences, <i>In Proceedings of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining</i>, pp. 477-486, 2009.</li>";
}
}

0 comments on commit fa5d326

Please sign in to comment.
You can’t perform that action at this time.