Skip to content

Commit

Permalink
Adjusted Premis events for non mbox import/export.
Browse files Browse the repository at this point in the history
  • Loading branch information
jfarwer committed Jan 15, 2023
1 parent 79e1620 commit 2e04227
Show file tree
Hide file tree
Showing 13 changed files with 202 additions and 295 deletions.
12 changes: 9 additions & 3 deletions WebContent/export.jsp
Expand Up @@ -467,9 +467,15 @@ Error: Export is only available in processing or appraisal modes!
}
var post_params={archiveID:archiveID, data:format, type:exportoptions};
var params = epadd.convertParamsToAmpersandSep(post_params);
var premisData = {eventType: "mbox export", eventDetailInformation: exportoptions};
var type = "export";
if (format == "to-mbox")
{
type = "mbox export"
}
else {
type = "eml export"
}
var premisData = {eventType: type, eventDetailInformation: exportoptions};
fetch_page_with_progress("ajax/downloadData.jsp", "status", document.getElementById('status'), document.getElementById('status_text'), params, null, null, premisData);
});
</script>
Expand Down
50 changes: 25 additions & 25 deletions WebContent/header.jspf
Expand Up @@ -183,31 +183,31 @@

<ul style="list-style:none;display:inline; padding:0" class="nav navbar-nav navbar-right">

<li class="dropdown language-selector">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" data-close-others="true">
<img id="active" src="images/icons8-great-britain-16.png" />
</a>
<ul class="dropdown-menu pull-right">
<li>
<a href="javascript:chang_lang('de')">
<img src="images/icons8-germany-16.png" />
<span>Deutsch</span>
</a>
</li>
<li>
<a href="javascript:chang_lang('en')">
<img src="images/icons8-great-britain-16.png" />
<span>English</span>
</a>
</li>
<li>
<a href="javascript:chang_lang('fr')">
<img src="images/icons8-france-16.png" />
<span>Fran�ois</span>
</a>
</li>
</ul>
</li>
<%--<li class="dropdown language-selector">--%>
<%-- <a href="#" class="dropdown-toggle" data-toggle="dropdown" data-close-others="true">--%>
<%-- <img id="active" src="images/icons8-great-britain-16.png" />--%>
<%-- </a>--%>
<%-- <ul class="dropdown-menu pull-right">--%>
<%-- <li>--%>
<%-- <a href="javascript:chang_lang('de')">--%>
<%-- <img src="images/icons8-germany-16.png" />--%>
<%-- <span>Deutsch</span>--%>
<%-- </a>--%>
<%-- </li>--%>
<%-- <li>--%>
<%-- <a href="javascript:chang_lang('en')">--%>
<%-- <img src="images/icons8-great-britain-16.png" />--%>
<%-- <span>English</span>--%>
<%-- </a>--%>
<%-- </li>--%>
<%-- <li>--%>
<%-- <a href="javascript:chang_lang('fr')">--%>
<%-- <img src="images/icons8-france-16.png" />--%>
<%-- <span>Fran�ois</span>--%>
<%-- </a>--%>
<%-- </li>--%>
<%-- </ul>--%>
<%--</li> --%>

<li class="dropdown" id="options">
<a id="help" title="help" class="dropdown-toggle" data-toggle="dropdown" href="#">
Expand Down
3 changes: 0 additions & 3 deletions WebContent/js/epadd.js
Expand Up @@ -193,9 +193,6 @@ function continueLogin(post_params) {
return;
}

//This is a bit of a hack to make the function isMbox() working.
post_params.emailSource2 = "mbox " + post_params.emailSource2;

var is_valid_account = [];
$('.account .input-field').removeClass('has-error'); // remove all input fields marked with an error

Expand Down
235 changes: 0 additions & 235 deletions epadd.iml

This file was deleted.

8 changes: 4 additions & 4 deletions src/java/edu/stanford/epadd/util/EmailConvert.java
Expand Up @@ -96,6 +96,10 @@ public static String getLicenseStatus() {
// return "License active";
// }

if (licenseStatus.endsWith("."))
{
licenseStatus = licenseStatus.substring(0, licenseStatus.length() -1);
}
return licenseStatus;

}
Expand Down Expand Up @@ -156,7 +160,6 @@ private void setUpConversion(HttpServletRequest request) throws UnsupportedEncod
if (directoryListing != null) {
for (File child : directoryListing) {
files.add(child);
System.out.println("child " + child);
}
} else {
files.add(dir);// Handle the case where dir is not really a directory.
Expand Down Expand Up @@ -238,7 +241,6 @@ private void convertFolder(File source, String outFolder, Format inFormat) {
//Just mbox for now.
OutputFormat outFormat = OutputFormat.forName("mbox");
converter = new Converter(source, inFormat, destination, outFormat);
System.out.println("Version -----------" + Settings.getVersion());
converter.addConverterListener(this);
currentFolder = source;
//converter.withLogFile("C:\\Users\\jochen\\log.log");
Expand Down Expand Up @@ -268,13 +270,11 @@ public void openingFile(File file) {
@Override
public void messageConverted(Date date, String s, File file) {
nMessageConverted++;
System.out.println("Message converted " + nMessageConverted);
if (nMessageConverted % 100 == 0) {
String currentFolderString = "";
if (currentFolder != null) {
currentFolderString = currentFolder.toString();
}
System.out.println("Print Status");
sendStatus(lastMessage + " " + nMessageConverted + " messages converted (" + converter.getPercentComplete() + "%) for folder " + currentFolderString);
}
}
Expand Down
135 changes: 126 additions & 9 deletions src/java/edu/stanford/muse/email/EmailFetcherThread.java
Expand Up @@ -90,6 +90,8 @@ public class EmailFetcherThread implements Runnable, Serializable {
public static final Date
INVALID_DATE; // like 0xdeadbeef

private static Set<String> alreadyReadNonMboxFiles = new HashSet<>();

static {
Calendar c = new GregorianCalendar();
c.set(Calendar.YEAR, 1960);
Expand Down Expand Up @@ -123,6 +125,66 @@ public class EmailFetcherThread implements Runnable, Serializable {
final EmailFetcherStats stats = new EmailFetcherStats();
String currentStatus;

public static void clearAlreadyReadNonMboxFiles() {
alreadyReadNonMboxFiles.clear();
}

public static void clearNonMboxData() {
nonMboxData = new HashMap<>();
}

private void addToNIngestedMessagesFromNonMbox(String fileName, int n)
{
NonMboxData data = nonMboxData.get(fileName);
if (data == null)
{
data = new NonMboxData();
}
data.nMessages += n;
nonMboxData.put(fileName, data);
}

private void incrementNIngestedFoldersFromNonMbox(String fileName)
{
NonMboxData data = nonMboxData.get(fileName);
if (data == null)
{
data = new NonMboxData();
}
data.nFolders ++;
nonMboxData.put(fileName, data);
}

private void addToNDuplicatesFromNonMbox(String fileName, int n)
{
NonMboxData data = nonMboxData.get(fileName);
if (data == null)
{
data = new NonMboxData();
}
data.nDuplicates += n;
nonMboxData.put(fileName, data);
}

private void addToNErrorFromNonMbox(String fileName, int n)
{
NonMboxData data = nonMboxData.get(fileName);
if (data == null)
{
data = new NonMboxData();
}
data.nErrors += n;
nonMboxData.put(fileName, data);
}

public static class NonMboxData
{
int nMessages = 0;
int nDuplicates = 0;
int nFolders = 0;
int nErrors = 0;
}
public static HashMap<String, NonMboxData> nonMboxData = new HashMap<>();

private int totalMessagesInFetch;
private int messagesCompletedInFetch; // this fetcher may be part of a bigger fetch operation. we need to track the progress of the bigger fetch in order to track progress accurately.
Expand Down Expand Up @@ -1503,6 +1565,8 @@ protected Message[] openFolderAndGetMessages() throws MessagingException {
* In order to make indexing of large archives possible, fetch of NON-MBOXEmailstrore formats is penalised. It is possible to avoid this by handling MBox and IMAP/POP formats differently.
*/
public void run() {
//Currently run() is only called directly and not by using start(). Therefore this is not part of a multi threaded
//operation and we don't have to worry about synchronization issues.
currentStatus = JSONUtils.getStatusJSON("Reading " + getFolderName());

isCancelled = false;
Expand Down Expand Up @@ -1559,15 +1623,49 @@ public void run() {
}
log.info("Fetch stats for this fetcher thread: " + stats);
}
try {
String fileName;

EpaddEvent.EventType eventType;
String pathOnDisk;

if (getFolderName().contains(EmailConvert.getTmpDir())) {
eventType = EpaddEvent.EventType.NON_MBOX_INGEST;
String mBoxPathWithNonMboxFileName = FolderInfo.removeTmpPartOfPath(getFolderName());
String nonMboxFileName = FolderInfo.getFirstPartOfPAth(mBoxPathWithNonMboxFileName);
fileName = nonMboxFileName;
pathOnDisk = emailStore.displayName;
if (new File(emailStore.displayName).isDirectory()) {
pathOnDisk = pathOnDisk + fileName;
}
} else {
eventType = EpaddEvent.EventType.MBOX_INGEST;
pathOnDisk = getFolderName();
}
boolean nonMboxFileAlreadyInPremis = false;
if (eventType == EpaddEvent.EventType.NON_MBOX_INGEST) {
nonMboxFileAlreadyInPremis = isNonMboxFileAlreadyInPremis(nMessages, pathOnDisk);
}
//In case of Mbox ingest we create an event for each ingested file. In case of a non mbox ingest we create the
//event once for each ingested non mbox file and not for each folder conained in the non mbox file.
if (eventType == EpaddEvent.EventType.MBOX_INGEST) {// || !nonMboxFileAlreadyInPremis) {
archive.getEpaddPremis().createEvent(eventType, nMessages + " messages - " + nErrors + " errors - Duplicate messages: " + stats.nMessagesAlreadyPresent, "success", "file name", pathOnDisk);
}
if (eventType == EpaddEvent.EventType.MBOX_INGEST || !nonMboxFileAlreadyInPremis) {
archive.getEpaddPremis().createFileObject(pathOnDisk, new File(pathOnDisk).length());
}

archive.getEpaddPremis().createEvent(EpaddEvent.EventType.MBOX_INGEST, nMessages + " messages - " + nErrors + " errors - Duplicate messages: " + stats.nMessagesAlreadyPresent, "success", "file name", getFolderName());
archive.getEpaddPremis().createFileObject(getFolderName(),new File(getFolderName()).length());
archive.getEpaddPremis().addToSignificantProperty("mbox file count", 1);
archive.getEpaddPremis().addToSignificantProperty("overall message_count", nMessages);
archive.getEpaddPremis().addToSignificantProperty("overall unique message_count", nMessages - stats.nMessagesAlreadyPresent);
archive.getEpaddPremis().setSignificantProperty("overall unique attachment count", archive.blobStore.uniqueBlobs.size());
//archive.getEpaddPremis().updateSignificantProperty("overall attachment count",);
archive.getEpaddPremis().setSignificantProperty("overall unique image count", archive.getNImageAttachments());
//archive.getEpaddPremis().addToSignificantProperty("mbox file count", 1);
archive.getEpaddPremis().addToSignificantProperty("overall message_count", nMessages);
archive.getEpaddPremis().addToSignificantProperty("overall unique message_count", nMessages - stats.nMessagesAlreadyPresent);
archive.getEpaddPremis().setSignificantProperty("overall unique attachment count", archive.blobStore.uniqueBlobs.size());
//archive.getEpaddPremis().updateSignificantProperty("overall attachment count",);
archive.getEpaddPremis().setSignificantProperty("overall unique image count", archive.getNImageAttachments());
}
catch (Exception e)
{
log.error("Exception creating Premis event in EmailFetcherThread " + e);
}
// archive.getEpaddPremis().setSignificantProperty("overall unique attached email count", archive.getNAttachedEmails());
// archive.getEpaddPremis().setSignificantProperty("overall unique word attachment count", archive.getNAttachmentsForSuffix("doc", "docx"));
// archive.getEpaddPremis().setSignificantProperty("overall unique ppt attachment count", archive.getNAttachmentsForSuffix("ppt", "pptx"));
Expand Down Expand Up @@ -1647,7 +1745,26 @@ public void run() {
}
}

/*
private boolean isNonMboxFileAlreadyInPremis(int nMessages, String pathOnDisk) {
boolean nonMboxFileAlreadyInPremis;//There might be a number of folders in a non mbox file. We want to add the event of importing
//the non mbox file only once and we also want to add the file to the Premis data only once.
//The Set alreadyReadNonMboxFiles exists only for the
nonMboxFileAlreadyInPremis = false;
for (String nonMboxFileName : alreadyReadNonMboxFiles) {
if (pathOnDisk.equals(nonMboxFileName)) {
nonMboxFileAlreadyInPremis = true;
break;
}
}
alreadyReadNonMboxFiles.add(pathOnDisk);
addToNIngestedMessagesFromNonMbox(pathOnDisk, nMessages);
addToNDuplicatesFromNonMbox(pathOnDisk, stats.nMessagesAlreadyPresent);
incrementNIngestedFoldersFromNonMbox(pathOnDisk);
addToNErrorFromNonMbox(pathOnDisk, nErrors);
return nonMboxFileAlreadyInPremis;
}

/*
* code for handling other kinds of headers, e.g. to find location of the
* message -- not used right now, but may use in the future.
* public void processHeaders(MimeMessage m) throws Exception
Expand Down
2 changes: 1 addition & 1 deletion src/java/edu/stanford/muse/email/FolderInfo.java
Expand Up @@ -92,7 +92,7 @@ public static boolean hasTrailingSlash(String s) {
return lastIndexOfSlash == s.length() - 1;
}

private static String getFirstPartOfPAth(String s)
static String getFirstPartOfPAth(String s)
{
//Looking for the index of File.separator doesn't work. We make any / or \ being /:
s = s.replace("\\", "/");
Expand Down
16 changes: 15 additions & 1 deletion src/java/edu/stanford/muse/email/MuseEmailFetcher.java
Expand Up @@ -19,6 +19,7 @@
import edu.stanford.muse.AddressBookManager.AddressBook;
import edu.stanford.muse.datacache.Blob;
import edu.stanford.muse.datacache.BlobStore;
import edu.stanford.muse.epaddpremis.EpaddEvent;
import edu.stanford.muse.exceptions.CancelledException;
import edu.stanford.muse.exceptions.MboxFolderNotReadableException;
import edu.stanford.muse.exceptions.NoDefaultFolderException;
Expand Down Expand Up @@ -582,7 +583,20 @@ public void fetchAndIndexEmails(Archive archive, String[] selectedFolders, boole
stats.userKey = "USER KEY UNUSED"; // (String) JSPHelper.getSessionAttribute(session, "userKey");
stats.fetchAndIndexTimeMillis = elapsedMillis;

updateStats(archive, addressBook, stats);
try {
updateStats(archive, addressBook, stats);
for (Map.Entry<String, EmailFetcherThread.NonMboxData> entry : EmailFetcherThread.nonMboxData.entrySet()) {
EmailFetcherThread.NonMboxData data = entry.getValue();
String fileName = entry.getKey();
archive.getEpaddPremis().createEvent(EpaddEvent.EventType.NON_MBOX_INGEST, "Folders ingested: " + data.nFolders + " - Messages ingested: " + data.nMessages + " - Duplicate messages: " + data.nDuplicates + " - Errors: " + data.nErrors, "success", "file name", entry.getKey());// + " messages - " + nErrors + " errors t, "success", "file name", pathOnDisk);
}
EmailFetcherThread.clearAlreadyReadNonMboxFiles();
EmailFetcherThread.clearNonMboxData();
}
catch (Exception e)
{
log.error("Exception creating Premis event " + e);
}
//if (session != null)
// session.removeAttribute("statusProvider");
log.info ("Fetch+index complete: " + Util.commatize(System.currentTimeMillis() - startTime) + " ms");
Expand Down
13 changes: 10 additions & 3 deletions src/java/edu/stanford/muse/epaddpremis/EpaddEvent.java
Expand Up @@ -108,7 +108,13 @@ public enum EventType {
// 2022-09-01 Added IMAP_INGEST
// 2022-11-03 Added more EventType
// TRANSFER_TO_PROCESSING("transfer to processing"), TRANSFER_TO_DISCOVERY_AND_DELIVERY("transfer to discovery and delivery"), MBOX_INGEST("mbox ingest"), MBOX_EXPORT("mbox export"), EXPORT_FOR_PRESERVATION("export for preservation"), NOT_RECOGNIZED("not recognized");
TRANSFER_TO_PROCESSING("transfer to processing"), TRANSFER_TO_DISCOVERY_AND_DELIVERY("transfer to discovery and delivery"), MBOX_INGEST("mbox ingest"), MBOX_EXPORT("mbox export"), EXPORT_FOR_PRESERVATION("export for preservation"), NOT_RECOGNIZED("not recognized"),
TRANSFER_TO_PROCESSING("transfer to processing"),
TRANSFER_TO_DISCOVERY_AND_DELIVERY("transfer to discovery and delivery"),
MBOX_INGEST("mbox ingest"),
MBOX_EXPORT("mbox export"),
EML_EXPORT("eml export"),
NON_MBOX_INGEST("non Mbox ingest"),
EXPORT_FOR_PRESERVATION("export for preservation"),
IMAP_INGEST("imap ingest"),

INGESTION("Ingestion"),
Expand All @@ -125,7 +131,8 @@ public enum EventType {
NORMALIZATION("Normalization"),
TRANSCRIPTION("Transcription"),
CREATION("Creation"),
OTHER("Other")
OTHER("Other"),
NOT_RECOGNIZED("not recognized")
;

private final String eventType;
Expand Down Expand Up @@ -255,7 +262,7 @@ private void setOs()

private void setJavaVersion()
{
linkingAgentIdentifierValue += (" - running in " + System.getProperty("java.version"));
linkingAgentIdentifierValue += (" - running in Java " + System.getProperty("java.version"));
}

private static class LinkingAgentRole implements Serializable {
Expand Down
2 changes: 2 additions & 0 deletions src/java/edu/stanford/muse/epaddpremis/EpaddPremis.java
Expand Up @@ -257,6 +257,8 @@ public void createEvent(EpaddEvent.EventType eventType, String eventDetailInform
public void createEvent(JSONObject eventJsonObject) {
epaddEvents.add(new EpaddEvent(eventJsonObject, ModeConfig.getModeForDisplay(ArchiveReaderWriter.getArchiveIDForArchive(archive))));
printToFiles();


}

private String getXmlPathAndFileName() {
Expand Down
Expand Up @@ -5,8 +5,6 @@

import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.adapters.XmlAdapter;
import javax.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
import java.io.Serializable;
import java.util.HashSet;
import java.util.Set;
Expand All @@ -32,7 +30,7 @@ public SignificantProperties() {

public static Set<String> initialise() {
Set<String> initialProperties = new HashSet<>();
initialProperties.add("mbox file count");
// initialProperties.add("mbox file count");
initialProperties.add("overall message_count");
initialProperties.add("overall unique message_count");
initialProperties.add("overall unique attachment count");
Expand Down

0 comments on commit 2e04227

Please sign in to comment.