diff --git a/DumpReader.cs b/DumpReader.cs new file mode 100644 index 0000000..39e6295 --- /dev/null +++ b/DumpReader.cs @@ -0,0 +1,73 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Data; +using System.Xml; +using System.Data.SqlClient; + +namespace SoSlow { + + + delegate string ColumnValidator(string name, string value); + + class DumpReader : MinimalDataReader { + + DataTable schema; + DataColumn nameColumn; + ColumnValidator validator; + XmlTextReader reader; + + public DumpReader(string filename, string target, SqlConnection connection, ColumnValidator validator) { + using (var cmd = connection.CreateCommand()) { + cmd.CommandType = CommandType.Text; + cmd.CommandText = "select top 1 * from " + target; + using (var reader = cmd.ExecuteReader()) { + schema = reader.GetSchemaTable(); + } + } + + nameColumn = schema.Columns[0]; + + this.reader = new XmlTextReader(filename); + this.validator = validator; + } + + int rowNumber; + public override bool Read() { + rowNumber++; + bool gotRow = false; + while (reader.Read()) { + if (reader.Name == "row") { + gotRow = true; + break; + } + } + return gotRow; + } + + public override void Dispose() { + reader.Close(); + } + + public override int FieldCount { + get { return schema.Rows.Count; } + } + + public override object GetValue(int i) { + string name = (string)schema.Rows[i][nameColumn]; + return ValidateOrDefault(name, reader.GetAttribute(name)); + } + + private string ValidateOrDefault(string name, string data) { + if (validator != null) { + return validator(name, data); + } + return data; + } + + public override DataTable GetSchemaTable() { + return schema; + } + + } +} diff --git a/Importer.cs b/Importer.cs index a963de7..b8dfb96 100644 --- a/Importer.cs +++ b/Importer.cs @@ -62,193 +62,5 @@ class Importer { - delegate string ColumnValidator(string name, string value); - - class DumpReader : IDataReader { - - - - DataTable schema; - DataColumn nameColumn; - ColumnValidator validator; - XmlTextReader reader; - - public DumpReader(string filename, string target, SqlConnection connection, ColumnValidator validator) { - using (var cmd = connection.CreateCommand()) { - cmd.CommandType = CommandType.Text; - cmd.CommandText = "select top 1 * from " + target; - using (var reader = cmd.ExecuteReader()) { - schema = reader.GetSchemaTable(); - } - } - - nameColumn = schema.Columns[0]; - - this.reader = new XmlTextReader(filename); - this.validator = validator; - } - - #region IDataReader Members - - public void Close() { - throw new NotImplementedException(); - } - - public int Depth { - get { throw new NotImplementedException(); } - } - - public DataTable GetSchemaTable() { - return schema; - } - - public bool IsClosed { - get { throw new NotImplementedException(); } - } - - public bool NextResult() { - throw new NotImplementedException(); - } - - int rowNumber; - public bool Read() { - rowNumber++; - bool gotRow = false; - while (reader.Read()) { - if (reader.Name == "row") { - gotRow = true; - break; - } - } - return gotRow; - } - - public int RecordsAffected { - get { throw new NotImplementedException(); } - } - - #endregion - - #region IDisposable Members - - public void Dispose() { - reader.Close(); - } - - #endregion - - #region IDataRecord Members - - public int FieldCount { - get { return schema.Rows.Count; } - } - - public bool GetBoolean(int i) { - throw new NotImplementedException(); - } - - public byte GetByte(int i) { - throw new NotImplementedException(); - } - - public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) { - throw new NotImplementedException(); - } - - public char GetChar(int i) { - throw new NotImplementedException(); - } - - public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) { - throw new NotImplementedException(); - } - - public IDataReader GetData(int i) { - throw new NotImplementedException(); - } - - public string GetDataTypeName(int i) { - throw new NotImplementedException(); - } - - public DateTime GetDateTime(int i) { - throw new NotImplementedException(); - } - - public decimal GetDecimal(int i) { - throw new NotImplementedException(); - } - - public double GetDouble(int i) { - throw new NotImplementedException(); - } - - public Type GetFieldType(int i) { - throw new NotImplementedException(); - } - - public float GetFloat(int i) { - throw new NotImplementedException(); - } - - public Guid GetGuid(int i) { - throw new NotImplementedException(); - } - - public short GetInt16(int i) { - throw new NotImplementedException(); - } - - public int GetInt32(int i) { - throw new NotImplementedException(); - } - - public long GetInt64(int i) { - throw new NotImplementedException(); - } - - public string GetName(int i) { - throw new NotImplementedException(); - } - - public int GetOrdinal(string name) { - throw new NotImplementedException(); - } - - public string GetString(int i) { - throw new NotImplementedException(); - } - - public object GetValue(int i) { - string name = (string)schema.Rows[i][nameColumn]; - return ValidateOrDefault(name, reader.GetAttribute(name)); - } - - private string ValidateOrDefault(string name, string data) { - if (validator != null) { - return validator(name, data); - } - return data; - } - - public int GetValues(object[] values) { - throw new NotImplementedException(); - } - - public bool IsDBNull(int i) { - throw new NotImplementedException(); - } - - public object this[string name] { - get { throw new NotImplementedException(); } - } - - public object this[int i] { - get { throw new NotImplementedException(); } - } - - #endregion - } - } } diff --git a/MainForm.cs b/MainForm.cs index 8a8c6ed..c1c6f94 100644 --- a/MainForm.cs +++ b/MainForm.cs @@ -72,10 +72,7 @@ public partial class MainForm : Form { SqlConnection cnn = new SqlConnection(connectionString.Text); cnn.Open(); - using (var cmd = cnn.CreateCommand()) { - cmd.CommandText = LoadResource("SoSlow.RecreateDB.sql"); - cmd.ExecuteNonQuery(); - } + CreateDB(cnn); string[] files = new string[] { "comments", "badges", "posts", "users", "votes" }; @@ -83,7 +80,10 @@ public partial class MainForm : Form { foreach (var file in files) { Importer importer = new Importer( - string.Format("c:\\temp\\{0}.xml",file), TitleCase(file), cnn); + Path.Combine(location.Text, string.Format("{0}.xml",file)), + TitleCase(file), + cnn + ); importer.Progress += new EventHandler(importer_Progress); importers.Add(importer); } @@ -97,9 +97,12 @@ public partial class MainForm : Form { importer.Import(); } - SetProgressMessage("Importing cc_wiki_field!"); - ImportCCWiki(cnn); + ImportCCWiki(cnn); + + SetProgressMessage("Creating Tag Refs!"); + baseProgressMessage = "Impoting tag refs"; + ImportTagRefs(cnn); SetProgressMessage("Done !"); EnableImportButton(); @@ -107,6 +110,29 @@ public partial class MainForm : Form { } + private void ImportTagRefs(SqlConnection cnn) { + + SqlBulkCopy copy = new SqlBulkCopy(cnn, SqlBulkCopyOptions.TableLock, null); + copy.DestinationTableName = "PostTags"; + copy.BatchSize = 5000; + copy.NotifyAfter = 5000; + copy.SqlRowsCopied += new SqlRowsCopiedEventHandler(copy_SqlRowsCopied); + using (var reader = new TagReader(connectionString.Text)) { + copy.WriteToServer(reader); + } + } + + void copy_SqlRowsCopied(object sender, SqlRowsCopiedEventArgs e) { + importer_Progress(this, new ProgressEventArgs() { RowsImported = (int)e.RowsCopied }); + } + + private void CreateDB(SqlConnection cnn) { + using (var cmd = cnn.CreateCommand()) { + cmd.CommandText = LoadResource("SoSlow.RecreateDB.sql"); + cmd.ExecuteNonQuery(); + } + } + private void ImportCCWiki(SqlConnection cnn) { using (var cmd = cnn.CreateCommand()) { diff --git a/MinimalDataReader.cs b/MinimalDataReader.cs new file mode 100644 index 0000000..8792581 --- /dev/null +++ b/MinimalDataReader.cs @@ -0,0 +1,143 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Data; + +namespace SoSlow { + // minimal reader for bulk importer + abstract class MinimalDataReader : IDataReader { + + public abstract void Dispose(); + + public abstract int FieldCount { + get; + } + + public abstract object GetValue(int i); + + public abstract DataTable GetSchemaTable(); + + public abstract bool Read(); + + + public int RecordsAffected { + get {throw new NotImplementedException();} + } + + public void Close() { + throw new NotImplementedException(); + } + + public int Depth { + get { throw new NotImplementedException(); } + } + + + public bool IsClosed { + get { throw new NotImplementedException(); } + } + + public bool NextResult() { + throw new NotImplementedException(); + } + + public bool GetBoolean(int i) { + throw new NotImplementedException(); + } + + public byte GetByte(int i) { + throw new NotImplementedException(); + } + + public long GetBytes(int i, long fieldOffset, byte[] buffer, int bufferoffset, int length) { + throw new NotImplementedException(); + } + + public char GetChar(int i) { + throw new NotImplementedException(); + } + + public long GetChars(int i, long fieldoffset, char[] buffer, int bufferoffset, int length) { + throw new NotImplementedException(); + } + + public IDataReader GetData(int i) { + throw new NotImplementedException(); + } + + public string GetDataTypeName(int i) { + throw new NotImplementedException(); + } + + public DateTime GetDateTime(int i) { + throw new NotImplementedException(); + } + + public decimal GetDecimal(int i) { + throw new NotImplementedException(); + } + + public double GetDouble(int i) { + throw new NotImplementedException(); + } + + public Type GetFieldType(int i) { + throw new NotImplementedException(); + } + + public float GetFloat(int i) { + throw new NotImplementedException(); + } + + public Guid GetGuid(int i) { + throw new NotImplementedException(); + } + + public short GetInt16(int i) { + throw new NotImplementedException(); + } + + public int GetInt32(int i) { + throw new NotImplementedException(); + } + + public long GetInt64(int i) { + throw new NotImplementedException(); + } + + public string GetName(int i) { + throw new NotImplementedException(); + } + + public int GetOrdinal(string name) { + throw new NotImplementedException(); + } + + public string GetString(int i) { + throw new NotImplementedException(); + } + + + public int GetValues(object[] values) { + throw new NotImplementedException(); + } + + public bool IsDBNull(int i) { + throw new NotImplementedException(); + } + + public object this[string name] { + get { throw new NotImplementedException(); } + } + + public object this[int i] { + get { throw new NotImplementedException(); } + } + + + + + + + } +} diff --git a/RecreateDB.sql b/RecreateDB.sql index 28052f2..d2c9ad4 100644 --- a/RecreateDB.sql +++ b/RecreateDB.sql @@ -17,6 +17,15 @@ IF OBJECT_ID(N'[dbo].[Votes]') is not null IF OBJECT_ID(N'[dbo].[VoteTypes]') is not null DROP TABLE [VoteTypes] +IF OBJECT_ID(N'[dbo].[Tags]') is not null + DROP TABLE [Tags] + + +IF OBJECT_ID(N'[dbo].[PostTags]') is not null + DROP TABLE [PostTags] + + + CREATE TABLE [dbo].[Badges] ( [Id] [int] IDENTITY ( 1 , 1 ) NOT NULL ,[UserId] [int] NULL @@ -88,6 +97,22 @@ CONSTRAINT [PK_Votes] PRIMARY KEY CLUSTERED ( [Id] ASC ) WITH ( PAD_INDEX = OFF, ON [PRIMARY] + +CREATE TABLE [dbo].[Tags] ( +[Id] [int] identity primary key NOT NULL +,[TagName] varchar(255) NULL +) +ON [PRIMARY] + + +CREATE TABLE [dbo].[PostTags] ( + PostId int, + TagId int +) +ON [PRIMARY] + +create unique clustered index PostTagsIndex on PostTags (PostId,TagId) + create table VoteTypes ( Id int primary key, Name varchar(40)) insert VoteTypes diff --git a/SoSlow.csproj b/SoSlow.csproj index ea7cf4e..e21ec57 100644 --- a/SoSlow.csproj +++ b/SoSlow.csproj @@ -47,6 +47,7 @@ + Form @@ -54,6 +55,7 @@ MainForm.cs + @@ -81,6 +83,8 @@ Settings.settings True + +